diff --git a/dev/404.html b/dev/404.html deleted file mode 100644 index 78e5f544..00000000 --- a/dev/404.html +++ /dev/null @@ -1,958 +0,0 @@ - - - - - - - - - - - - - - - - - - - - BrickFlow - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
-
- -
- - - - - - - - -
- - - - - - - -
- -
- - - - -
-
- - - -
-
-
- - - - - - -
-
-
- - - -
-
-
- - - -
-
-
- - - -
-
- -

404 - Not found

- -
-
- - -
- - - -
- - - -
-
-
-
- - - - - - - - - \ No newline at end of file diff --git a/dev/api/airflow_external_task_dependency/index.html b/dev/api/airflow_external_task_dependency/index.html deleted file mode 100644 index 9196a164..00000000 --- a/dev/api/airflow_external_task_dependency/index.html +++ /dev/null @@ -1,1889 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - AirflowTaskDependencySensor - BrickFlow - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - Skip to content - - -
-
- -
- - - - - - - - -
- - - - - - - -
- -
- - - - -
-
- - - -
-
-
- - - - - - -
-
-
- - - -
-
-
- - - -
-
-
- - - -
-
- - - - - - - -
- - - -
- - - -
- - - - - -

Attributes

-

Classes

- -
- - - -

- brickflow_plugins.airflow.operators.external_tasks.MapDagSchedule - - -

- - -
- - - - - - -
- - - - - - - -

Functions

- -
- - - -

-get_schedule(wf_id: str, **args: str) - -

- - -
- -

Function that the sensors defined while deriving this class should -override.

- -
- Source code in brickflow_plugins/airflow/operators/external_tasks.py -
15
-16
-17
-18
-19
-20
def get_schedule(self, wf_id: str, **args):
-    """
-    Function that the sensors defined while deriving this class should
-    override.
-    """
-    raise Exception("Override me.")
-
-
-
- -
- -
- - - -

-get_task_run_status(wf_id: str, task_id: str, run_date: str = None, cluster_id: str = None, **args: str) - -

- - -
- -

Function that the sensors defined while deriving this class should -override.

- -
- Source code in brickflow_plugins/airflow/operators/external_tasks.py -
22
-23
-24
-25
-26
-27
-28
-29
def get_task_run_status(
-    self, wf_id: str, task_id: str, run_date=None, cluster_id=None, **args
-):
-    """
-    Function that the sensors defined while deriving this class should
-    override.
-    """
-    raise Exception("Override me.")
-
-
-
- -
- - - -
- -
- -
- -
- - - -

-brickflow_plugins.airflow.operators.external_tasks.MapDagScheduleHelper(okta_conn_id: str) - -

- - -
-

- Bases: MapDagSchedule

- - - -
- Source code in brickflow_plugins/airflow/operators/external_tasks.py -
36
-37
-38
-39
def __init__(self, okta_conn_id: str):
-    self._okta_conn: Connection = Connection.get_connection_from_secrets(
-        okta_conn_id
-    )
-
-
- - - -
- - - - - - - -

Functions

- -
- - - -

-get_access_token() -> str - -

- - -
- -
- Source code in brickflow_plugins/airflow/operators/external_tasks.py -
53
-54
-55
-56
-57
-58
-59
-60
-61
-62
-63
-64
-65
-66
-67
-68
-69
-70
-71
-72
-73
-74
-75
-76
-77
-78
-79
-80
-81
def get_access_token(self) -> str:
-    okta_url = self.get_okta_url()
-    client_id = self.get_okta_client_id()
-    client_secret = self.get_okta_client_secret()
-
-    okta_url = os.getenv("OKTA_URL", okta_url)
-    payload = (
-        "client_id="
-        + client_id
-        + "&client_secret="
-        + client_secret
-        + "&grant_type=client_credentials"
-    )
-    headers = {
-        "Content-Type": "application/x-www-form-urlencoded",
-        "cache-control": "no-cache",
-    }
-    response = requests.post(okta_url, data=payload, headers=headers, timeout=600)
-    if (
-        response.status_code < HTTPStatus.OK
-        or response.status_code > HTTPStatus.PARTIAL_CONTENT
-    ):
-        log.error(
-            "Failed request to Okta for JWT status_code={} response={} client_id={}".format(
-                response.status_code, response.text, client_id
-            )
-        )
-    token_data = response.json()["access_token"]
-    return token_data
-
-
-
- -
- -
- - - -

-get_airflow_api_url(cluster_id: str) -> str - -

- - -
- -
- Source code in brickflow_plugins/airflow/operators/external_tasks.py -
83
-84
-85
-86
def get_airflow_api_url(self, cluster_id: str) -> str:
-    # TODO: templatize this to a env variable
-    base_api_url = f"https://proxy.us-east-1.map.nike.com/{cluster_id}"
-    return base_api_url
-
-
-
- -
- -
- - - -

-get_okta_client_id() -> str - -

- - -
- -
- Source code in brickflow_plugins/airflow/operators/external_tasks.py -
def get_okta_client_id(self) -> str:
-    return self._okta_conn.login
-
-
-
- -
- -
- - - -

-get_okta_client_secret() -> str - -

- - -
- -
- Source code in brickflow_plugins/airflow/operators/external_tasks.py -
def get_okta_client_secret(self) -> str:
-    return self._okta_conn.get_password()
-
-
-
- -
- -
- - - -

-get_okta_url() -> str - -

- - -
- -
- Source code in brickflow_plugins/airflow/operators/external_tasks.py -
41
-42
-43
-44
-45
def get_okta_url(self) -> str:
-    conn_type = self._okta_conn.conn_type
-    host = self._okta_conn.host
-    schema = self._okta_conn.schema
-    return f"{conn_type}://{host}/{schema}"
-
-
-
- -
- -
- - - -

-get_schedule(wf_id: str, **kwargs: str) - -

- - -
- -

get work flow schedule cron syntax

- -
- Source code in brickflow_plugins/airflow/operators/external_tasks.py -
def get_schedule(self, wf_id: str, **kwargs):
-    """
-    get work flow schedule cron syntax
-    """
-    raise Exception("Do not have implementation")
-
-
-
- -
- -
- - - -

-get_task_run_status(wf_id: str, task_id: str, run_date: str = None, cluster_id: str = None, **args: str) - -

- - -
- -
- Source code in brickflow_plugins/airflow/operators/external_tasks.py -
def get_task_run_status(
-    self, wf_id: str, task_id: str, run_date=None, cluster_id=None, **args
-):
-    token_data = self.get_access_token()
-    api_url = self.get_airflow_api_url(cluster_id)
-    version_nr = self.get_version(cluster_id)
-    dag_id = wf_id
-    headers = {
-        "Content-Type": "application/json",
-        "cache-control": "no-cache",
-        "Authorization": "Bearer " + token_data,
-    }
-    o_task_status = "UKN"
-    session = requests.Session()
-    retries = Retry(
-        total=5, backoff_factor=1, status_forcelist=[502, 503, 504, 500]
-    )
-    session.mount("https://", HTTPAdapter(max_retries=retries))
-    if version_nr.startswith("1."):
-        log.info("this is 1.x cluster")
-        url = (
-            api_url
-            + "/api/experimental"
-            + "/dags/"
-            + dag_id
-            + "/dag_runs/"
-            + run_date
-            + "/tasks/"
-            + task_id
-        )
-    else:
-        url = (
-            api_url
-            + "/api/v1/dags/"
-            + dag_id
-            + "/dagRuns/scheduled__"
-            + run_date
-            + "/taskInstances/"
-            + task_id
-        )
-
-    log.info(f"url= {url.replace(' ', '')}")
-    response = session.get(url.replace(" ", ""), headers=headers)
-
-    log.info(
-        f"response.status_code= {response.status_code} response.text= {response.text}"
-    )
-    if response.status_code == 200:
-        log.info(f"response= {response.text}")
-        json_obj = json.loads(response.text)
-        if type(json_obj) == dict:
-            o_task_status = json_obj["state"]
-
-        return o_task_status
-
-    return o_task_status
-
-
-
- -
- -
- - - -

-get_version(cluster_id: str) -> str - -

- - -
- -
- Source code in brickflow_plugins/airflow/operators/external_tasks.py -
def get_version(self, cluster_id: str) -> str:
-    session = requests.Session()
-    retries = Retry(
-        total=10, backoff_factor=1, status_forcelist=[502, 503, 504, 500]
-    )
-    session.mount("https://", HTTPAdapter(max_retries=retries))
-    version_check_url = (
-        self.get_airflow_api_url(cluster_id) + "/admin/rest_api/api?api=version"
-    )
-    logging.info(version_check_url)
-    otoken = self.get_access_token()
-    headers = {"Authorization": "Bearer " + otoken, "Accept": "application/json"}
-    out_version = "UKN"
-    response = session.get(version_check_url, headers=headers, verify=False)
-    if response.status_code == HTTPStatus.OK:
-        out_version = response.json()["output"]
-    log.info(response.text.encode("utf8"))
-    session.close()
-    return out_version
-
-
-
- -
- - - -
- -
- -
- -
- - - -

-brickflow_plugins.airflow.operators.external_tasks.TaskDependencySensor(external_dag_id, external_task_id, okta_conn_id, allowed_states = None, execution_delta = None, execution_delta_json = None, cluster_id = None, *args, **kwargs) - -

- - -
-

- Bases: BaseSensorOperator

- - - -
- Source code in brickflow_plugins/airflow/operators/external_tasks.py -
def __init__(
-    self,
-    external_dag_id,
-    external_task_id,
-    okta_conn_id,
-    allowed_states=None,
-    execution_delta=None,
-    execution_delta_json=None,
-    cluster_id=None,
-    *args,
-    **kwargs,
-):
-    super(TaskDependencySensor, self).__init__(*args, **kwargs)
-    self.okta_conn_id = okta_conn_id
-    self.allowed_states = allowed_states or ["success"]
-
-    if execution_delta_json and execution_delta:
-        raise Exception(
-            "Only one of `execution_date` or `execution_delta_json` maybe provided to Sensor; not more than one."
-        )
-
-    self.external_dag_id = external_dag_id
-    self.external_task_id = external_task_id
-    self.allowed_states = allowed_states
-    self.execution_delta = execution_delta
-    self.execution_delta_json = execution_delta_json
-    self.cluster_id = cluster_id
-
-    self._poke_count = 0
-    self.dbx_wf_id = kwargs.get("dbx_wf_id")
-
-
- - - -
- - - - - -

Attributes

- -
- - - -

-allowed_states = allowed_states - - - instance-attribute - - -

- - -
-
- -
- -
- - - -

-cluster_id = cluster_id - - - instance-attribute - - -

- - -
-
- -
- -
- - - -

-dbx_wf_id = kwargs.get('dbx_wf_id') - - - instance-attribute - - -

- - -
-
- -
- -
- - - -

-execution_delta = execution_delta - - - instance-attribute - - -

- - -
-
- -
- -
- - - -

-execution_delta_json = execution_delta_json - - - instance-attribute - - -

- - -
-
- -
- -
- - - -

-external_dag_id = external_dag_id - - - instance-attribute - - -

- - -
-
- -
- -
- - - -

-external_task_id = external_task_id - - - instance-attribute - - -

- - -
-
- -
- -
- - - -

-okta_conn_id = okta_conn_id - - - instance-attribute - - -

- - -
-
- -
- -

Functions

- -
- - - -

-poke(context) - -

- - -
- -
- Source code in brickflow_plugins/airflow/operators/external_tasks.py -
def poke(self, context):
-    log.info(f"executing poke.. {self._poke_count}")
-    self._poke_count = self._poke_count + 1
-    logging.info("Poking.. {0} round".format(str(self._poke_count)))
-
-    exec_time = context["execution_date"]
-
-    task_status = MapDagScheduleHelper(self.okta_conn_id).get_task_run_status(
-        wf_id=self.external_dag_id,
-        task_id=self.external_task_id,
-        run_date=exec_time,
-        cluster_id=self.cluster_id,
-    )
-    log.info(f"task_status= {task_status}")
-
-    if task_status not in self.allowed_states:
-        count = 0
-    else:
-        count = 1
-
-    return count
-
-
-
- -
- - - -
- -
- -
- - - - -
- -
- -
- - - - - - -
-
- - -
- - - -
- - - -
-
-
-
- - - - - - - - - \ No newline at end of file diff --git a/dev/api/airflow_native_operators/index.html b/dev/api/airflow_native_operators/index.html deleted file mode 100644 index 502dc3ed..00000000 --- a/dev/api/airflow_native_operators/index.html +++ /dev/null @@ -1,1237 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - AirflowNativeOperators - BrickFlow - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - Skip to content - - -
-
- -
- - - - - - - - -
- - - - - - - -
- -
- - - - -
-
- - - -
-
-
- - - - - - -
-
-
- - - -
-
-
- - - -
-
-
- - - -
-
- - - - - - - -
- - - -
- - - -
- - - - - -

Attributes

-

Classes

- -
- - - -

- brickflow_plugins.airflow.operators.native_operators.BashOperatorModifier - - -

- - -
-

- Bases: OperatorModifier

- - - - - - -
- - - - - - - -

Functions

- -
- - - -

-modify(operator: BashOperator, task: Task, workflow: Workflow) -> Optional[BashOperator] - -

- - -
- -
- Source code in brickflow_plugins/airflow/operators/native_operators.py -
88
-89
-90
-91
-92
-93
-94
-95
@check_if(BashOperator)
-def modify(
-    self, operator: BashOperator, task: Task, workflow: Workflow
-) -> Optional["BashOperator"]:
-    f = types.MethodType(_bash_execute, operator)
-    operator.execute = f
-    operator.on_kill = _bash_empty_on_kill
-    return operator
-
-
-
- -
- - - -
- -
- -
- -
- - - -

- brickflow_plugins.airflow.operators.native_operators.BranchPythonOperatorModifier - - -

- - -
-

- Bases: OperatorModifier

- - - - - - -
- - - - - - - -

Functions

- -
- - - -

-modify(operator: BranchPythonOperator, task: Task, workflow: Workflow) -> Optional[BranchPythonOperator] - -

- - -
- -
- Source code in brickflow_plugins/airflow/operators/native_operators.py -
@check_if(BranchPythonOperator)
-def modify(
-    self, operator: BranchPythonOperator, task: Task, workflow: Workflow
-) -> Optional["BranchPythonOperator"]:
-    f = types.MethodType(_skip_all_except, operator)
-    operator.skip_all_except = f
-    return operator
-
-
-
- -
- - - -
- -
- -
- -
- - - -

- brickflow_plugins.airflow.operators.native_operators.ShortCircuitOperatorModifier - - -

- - -
-

- Bases: OperatorModifier

- - - - - - -
- - - - - - - -

Functions

- -
- - - -

-modify(operator: ShortCircuitOperator, task: Task, workflow: Workflow) -> Optional[ShortCircuitOperator] - -

- - -
- -
- Source code in brickflow_plugins/airflow/operators/native_operators.py -
@check_if(ShortCircuitOperator)
-def modify(
-    self, operator: ShortCircuitOperator, task: Task, workflow: Workflow
-) -> Optional["ShortCircuitOperator"]:
-    f = types.MethodType(_short_circuit_execute, operator)
-    operator.execute = f
-    return operator
-
-
-
- -
- - - -
- -
- -
-

Functions

- - - -
- -
- -
- - - - - - -
-
- - -
- - - -
- - - -
-
-
-
- - - - - - - - - \ No newline at end of file diff --git a/dev/api/cli/index.html b/dev/api/cli/index.html deleted file mode 100644 index ab4678a0..00000000 --- a/dev/api/cli/index.html +++ /dev/null @@ -1,2032 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - CLI - BrickFlow - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - Skip to content - - -
-
- -
- - - - - - - - -
- - - - - - - -
- -
- - - - -
-
- - - -
-
-
- - - - - - -
-
-
- - - -
-
-
- - - -
-
-
- - - -
-
- - - - - - - -
- - - -
- - - -
- - - - - -

Attributes

-

Classes

- -
- - - -

- brickflow.cli.CdktfCmd - - -

- - -
-

- Bases: click.Group

- - - - - - -
- - - - - - - -

Functions

- -
- - - -

-get_command(ctx: click.Context, cmd_name: str) -> Optional[click.Command] - -

- - -
- -
- Source code in brickflow/cli/__init__.py -
74
-75
-76
-77
-78
-79
-80
-81
-82
-83
-84
-85
def get_command(self, ctx: click.Context, cmd_name: str) -> Optional[click.Command]:
-    if cmd_name == BrickflowDeployMode.CDKTF.value:
-        return cdktf_command()
-    elif cmd_name == BrickflowDeployMode.BUNDLE.value:
-        return bundles_proxy_command()
-    # elif cmd_name in ["deploy", "diff"]:
-    #     return cdktf_command(cmd_name)
-    else:
-        rv = click.Group.get_command(self, ctx, cmd_name)
-        if rv is not None:
-            return rv
-        raise ctx.fail(f"No such command '{cmd_name}'.")
-
-
-
- -
- - - -
- -
- -
-

Functions

- -
- - - -

-brickflow.cli.bundle() -> None - -

- - -
- -

CLI for proxying to databricks bundles cli.

- -
- Source code in brickflow/cli/__init__.py -
@cli.command
-def bundle() -> None:
-    """CLI for proxying to databricks bundles cli."""
-    # Hack for having bundle show up as a command in brickflow
-    # with documentation.
-    pass  # pragma: no cover
-
-
-
- -
- -
- - - -

-brickflow.cli.bundles_proxy_command() -> click.Command - -

- - -
- -
- Source code in brickflow/cli/__init__.py -
50
-51
-52
-53
-54
-55
-56
-57
-58
-59
-60
-61
-62
-63
-64
-65
-66
-67
-68
-69
-70
def bundles_proxy_command() -> click.Command:
-    def run_bundle_command(args: Optional[List[str]] = None, **_: Any) -> None:
-        bundle_cli_setup()
-        bundle_cli = config(
-            BrickflowEnvVars.BRICKFLOW_BUNDLE_CLI_EXEC.value, "databricks"
-        )
-        log_important_versions(bundle_cli)
-        exec_command(bundle_cli, "bundle", args or [])
-
-    @click.command(
-        name="bundles_cmd",
-        short_help="CLI for proxying to databricks bundles cli..",
-        context_settings={"ignore_unknown_options": True},
-        add_help_option=False,
-    )
-    @click.argument("args", nargs=-1)
-    def cmd(args: List[str]) -> None:
-        # check to make sure you are in project root and then set python path to whole dir
-        run_bundle_command(args=args)
-
-    return cmd
-
-
-
- -
- -
- - - -

-brickflow.cli.cdktf() -> None - -

- - -
- -

CLI for proxying to cdktf cli.

- -
- Source code in brickflow/cli/__init__.py -
@cli.command
-def cdktf() -> None:
-    """CLI for proxying to cdktf cli."""
-    # Hack for having cdktf show up as a command in brickflow
-    # with documentation.
-    pass  # pragma: no cover
-
-
-
- -
- -
- - - -

-brickflow.cli.cdktf_command(base_command: Optional[str] = None) -> click.Command - -

- - -
- -
- Source code in brickflow/cli/__init__.py -
34
-35
-36
-37
-38
-39
-40
-41
-42
-43
-44
-45
-46
-47
def cdktf_command(base_command: Optional[str] = None) -> click.Command:
-    @click.command(
-        name="cdktf_cmd",
-        short_help="CLI for proxying to CDKTF cli.",
-        context_settings={"ignore_unknown_options": True},
-        add_help_option=False,
-        deprecated=True,
-    )
-    @click.argument("args", nargs=-1)
-    def cmd(args: Tuple[str]) -> None:
-        # check to make sure you are in project root and then set python path to whole dir
-        exec_cdktf_command(base_command, args)
-
-    return cmd
-
-
-
- -
- -
- - - -

-brickflow.cli.cdktf_env_set_options(f: Callable) -> Callable - -

- - -
- -
- Source code in brickflow/cli/__init__.py -
def cdktf_env_set_options(f: Callable) -> Callable:
-    def local_mode_callback(ctx: click.Context, param: str, value: Any) -> None:  # noqa
-        # pylint: disable=unused-argument
-        if value is not None and value is True:
-            _ilog.info(
-                "Configuring environment to %s...",
-                BrickflowDefaultEnvs.LOCAL.value,
-            )
-            os.environ[
-                BrickflowEnvVars.BRICKFLOW_ENV.value
-            ] = BrickflowDefaultEnvs.LOCAL.value
-
-    def deploy_only_workflows(
-        ctx: click.Context, param: str, value: Any
-    ) -> None:  # noqa
-        # pylint: disable=unused-argument
-        if value:
-            for file in value:
-                if file[-3:] != ".py":
-                    raise ClickException("Should pass only python files as workflows")
-            _ilog.info("Brickflow will only deploy workflows: %s", ", ".join(value))
-            os.environ[
-                BrickflowEnvVars.BRICKFLOW_DEPLOY_ONLY_WORKFLOWS.value
-            ] = ",".join(value)
-
-    def set_up_cdktf_for_workflow_dir(
-        ctx: click.Context, param: str, value: Any  # noqa
-    ) -> None:
-        if value is not None:
-            return value
-
-    options = [
-        click.option(
-            "--local-mode",
-            "-l",
-            is_flag=True,
-            callback=local_mode_callback,
-            help="Set the environment flag to local and other components [TBD] are disabled in local mode.",
-        ),
-        click.option(
-            "--workflows-dir",
-            "-wd",
-            type=click.Path(exists=True, file_okay=False),
-            prompt=INTERACTIVE_MODE,
-            callback=set_up_cdktf_for_workflow_dir,
-            help="Provide the workflow directory that has to be deployed",
-        ),
-        click.option(
-            "--workflow",
-            "-w",
-            type=str,
-            multiple=True,
-            callback=deploy_only_workflows,
-            help="""Provide the workflow file names which you want to deploy, each file name separated by space!
-                    Example: bf deploy -p DEFAULT -l -w wf1.py -w wf2.py""",
-        ),
-        click.option(
-            "--env",
-            "-e",
-            default=BrickflowDefaultEnvs.LOCAL.value,
-            type=str,
-            callback=bind_env_var(BrickflowEnvVars.BRICKFLOW_ENV.value),
-            help="Set the environment value, certain tags [TBD] get added to the workflows based on this value.",
-        ),
-        click.option(
-            "--repo-url",
-            "-r",
-            default=None,
-            type=str,
-            callback=bind_env_var(BrickflowEnvVars.BRICKFLOW_GIT_REPO.value),
-            help="The github url in which to run brickflow with.",
-        ),
-        click.option(
-            "--git-ref",
-            default=None,
-            type=str,
-            callback=bind_env_var(BrickflowEnvVars.BRICKFLOW_GIT_REF.value),
-            help="The commit/tag/branch to use in github.",
-        ),
-        click.option(
-            "--git-provider",
-            default=None,
-            type=str,
-            callback=bind_env_var(BrickflowEnvVars.BRICKFLOW_GIT_PROVIDER.value),
-            help="The github provider for brickflow this is used for configuring github on DBX jobs.",
-        ),
-        click.option(
-            "--profile",
-            "-p",
-            default=None,
-            type=str,
-            callback=bind_env_var(
-                BrickflowEnvVars.BRICKFLOW_DATABRICKS_CONFIG_PROFILE.value
-            ),
-            help="The databricks profile to use for authenticating to databricks during deployment.",
-        ),
-    ]
-    for option in options:
-        f = option(f)
-    return f
-
-
-
- -
- -
- - - -

-brickflow.cli.cli() -> None - -

- - -
- -

CLI for managing Databricks Workflows

- -
- Source code in brickflow/cli/__init__.py -
88
-89
-90
-91
@click.group(invoke_without_command=True, no_args_is_help=True, cls=CdktfCmd)
-@click.version_option(prog_name="brickflow")
-def cli() -> None:
-    """CLI for managing Databricks Workflows"""
-
-
-
- -
- -
- - - -

-brickflow.cli.deploy(**kwargs: Any) -> None - -

- - -
- -

CLI for deploying workflow projects.

- -
- Source code in brickflow/cli/__init__.py -
@cli.command
-@click.option(
-    "--auto-approve",
-    type=bool,
-    is_flag=True,
-    show_default=True,
-    default=False,
-    help="Auto approve brickflow pipeline without being prompted to approve.",
-)
-@click.option(
-    "--deploy-mode",
-    type=click.Choice(["cdktf", "bundle"]),
-    show_default=True,
-    default="cdktf",
-    help="Which deployment framework to use to deploy.",
-)
-@click.option(
-    "--force-acquire-lock",
-    type=bool,
-    is_flag=True,
-    show_default=True,
-    default=False,
-    help="Force acquire lock for databricks bundles deploy.",
-)
-@cdktf_env_set_options
-def deploy(**kwargs: Any) -> None:
-    """CLI for deploying workflow projects."""
-    # Hack for having cdktf show up as a command in brickflow
-    # with documentation.
-    deploy_mode = get_deployment_mode(**kwargs)
-    if deploy_mode == BrickflowDeployMode.CDKTF:
-        make_cdktf_json(**kwargs)
-        exec_cdktf_command("deploy", get_cdktf_specific_args(**kwargs))
-    else:
-        disable_project_name_in_env()
-        bundle_deploy(**kwargs)
-
-
-
- -
- -
- - - -

-brickflow.cli.destroy(**kwargs: Any) -> None - -

- - -
- -

CLI for destroying workflow projects.

- -
- Source code in brickflow/cli/__init__.py -
@cli.command
-@click.option(
-    "--auto-approve",
-    type=bool,
-    is_flag=True,
-    show_default=True,
-    default=False,
-    help="Auto approve brickflow pipeline without being prompted to approve.",
-)
-@click.option(
-    "--deploy-mode",
-    type=click.Choice(["cdktf", "bundle"]),
-    show_default=True,
-    default="cdktf",
-    help="Which deployment framework to use to deploy.",
-)
-@click.option(
-    "--force-acquire-lock",
-    type=bool,
-    is_flag=True,
-    show_default=True,
-    default=False,
-    help="Force acquire lock for databricks bundles destroy.",
-)
-@cdktf_env_set_options
-def destroy(**kwargs: Any) -> None:
-    """CLI for destroying workflow projects."""
-    # Hack for having cdktf show up as a command in brickflow
-    # with documentation.
-    deploy_mode = get_deployment_mode(**kwargs)
-
-    if deploy_mode == BrickflowDeployMode.CDKTF:
-        make_cdktf_json(**kwargs)
-        exec_cdktf_command("destroy", get_cdktf_specific_args(**kwargs))
-    else:
-        disable_project_name_in_env()
-        bundle_destroy(**kwargs)
-
-
-
- -
- -
- - - -

-brickflow.cli.diff(**kwargs: Any) -> None - -

- - -
- -

CLI for identifying diff in projects (only cdktf supported).

- -
- Source code in brickflow/cli/__init__.py -
@cli.command
-@cdktf_env_set_options
-def diff(**kwargs: Any) -> None:
-    """CLI for identifying diff in projects (only cdktf supported)."""
-    # Hack for having cdktf show up as a command in brickflow
-    # with documentation.
-    make_cdktf_json(**kwargs)
-    exec_cdktf_command("diff", [])
-
-
-
- -
- -
- - - -

-brickflow.cli.disable_project_name_in_env() -> None - -

- - -
- -
- Source code in brickflow/cli/__init__.py -
def disable_project_name_in_env() -> None:
-    # TODO: delete this when deploy commands are gone
-    # used for legacy bundles deploy and destroy commands
-    # disable multiple projects in same directory
-    os.environ[BrickflowEnvVars.BRICKFLOW_USE_PROJECT_NAME.value] = "False"
-
-
-
- -
- -
- - - -

-brickflow.cli.docs() -> None - -

- - -
- -

Use to open docs in your browser...

- -
- Source code in brickflow/cli/__init__.py -
@cli.command
-def docs() -> None:
-    """Use to open docs in your browser..."""
-    docs_site = "https://verbose-garbanzo-6b8a1ae2.pages.github.io/"
-    webbrowser.open(docs_site, new=2)
-    click.echo(f"Opening browser for docs... site: {docs_site}")
-
-
-
- -
- -
- - - -

-brickflow.cli.get_cdktf_specific_args(**kwargs: Dict[str, Any]) -> List[str] - -

- - -
- -
- Source code in brickflow/cli/__init__.py -
def get_cdktf_specific_args(**kwargs: Dict[str, Any]) -> List[str]:
-    args = []
-    if kwargs.get("auto_approve", False) is True:
-        args.append("--auto-approve")
-    return args
-
-
-
- -
- -
- - - -

-brickflow.cli.get_deployment_mode(**kwargs: Dict[str, Any]) -> BrickflowDeployMode - -

- - -
- -
- Source code in brickflow/cli/__init__.py -
def get_deployment_mode(**kwargs: Dict[str, Any]) -> BrickflowDeployMode:
-    # set deployment mode for cdktf or bundle
-    os.environ[BrickflowEnvVars.BRICKFLOW_DEPLOYMENT_MODE.value] = str(
-        kwargs.get("deploy_mode", BrickflowDeployMode.CDKTF.value)
-    )
-    if (
-        kwargs.get("deploy_mode", BrickflowDeployMode.CDKTF.value)
-        == BrickflowDeployMode.CDKTF.value
-    ):
-        return BrickflowDeployMode.CDKTF
-    else:
-        return BrickflowDeployMode.BUNDLE
-
-
-
- -
- -
- - - -

-brickflow.cli.make_cdktf_json(**kwargs: Any) -> None - -

- - -
- -
- Source code in brickflow/cli/__init__.py -
def make_cdktf_json(**kwargs: Any) -> None:
-    wd: Optional[str] = kwargs.get("workflows_dir")
-    if wd is None:
-        raise ValueError(
-            "workflows_dir not set, please set it using --workflows-dir or -wd"
-        )
-    idempotent_cdktf_out(wd)
-
-
-
- -
- -
- - - -

-brickflow.cli.sync(**kwargs: Any) -> None - -

- - -
- -

Synchronize your bundle tree to databricks workspace (only supported by bundle deployment mode).

- -
- Source code in brickflow/cli/__init__.py -
@cli.command
-@click.option(
-    "--deploy-mode",
-    type=click.Choice(["bundle"]),
-    show_default=True,
-    default="bundle",
-    help="Which deployment framework to use to deploy.",
-)
-@click.option(
-    "--watch",
-    type=bool,
-    is_flag=True,
-    show_default=True,
-    default=False,
-    help="Enable filewatcher to sync files over.",
-)
-@click.option(
-    "--full",
-    type=bool,
-    is_flag=True,
-    show_default=True,
-    default=False,
-    help="Run a full sync.",
-)
-@click.option(
-    "--interval-duration",
-    type=str,
-    show_default=True,
-    default=None,
-    help="File system polling interval (for --watch).",
-)
-@click.option(
-    "--debug",
-    type=str,
-    show_default=True,
-    default=None,
-    help="File system polling interval (for --watch).",
-)
-@cdktf_env_set_options
-def sync(**kwargs: Any) -> None:
-    """Synchronize your bundle tree to databricks workspace (only supported by bundle deployment mode)."""
-    deploy_mode = get_deployment_mode(**kwargs)
-    if deploy_mode == BrickflowDeployMode.BUNDLE:
-        bundle_sync(**kwargs)
-    else:
-        raise ClickException(
-            "Unsupported deploy mode for sync; currently only supports bundle deploy mode."
-        )
-
-
-
- -
- - - -
- -
- -
- - - - - - -
-
- - -
- - - -
- - - -
-
-
-
- - - - - - - - - \ No newline at end of file diff --git a/dev/api/compute/index.html b/dev/api/compute/index.html deleted file mode 100644 index 3aa73a62..00000000 --- a/dev/api/compute/index.html +++ /dev/null @@ -1,3910 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - Compute - BrickFlow - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - Skip to content - - -
-
- -
- - - - - - - - -
- - - - - - - -
- -
- - - - -
-
- - - -
-
-
- - - - - - -
-
-
- - - -
-
-
- - - -
-
-
- - - -
-
- - - - - - - -
- - - -
- - - -
- - - - - - -

Classes

- -
- - - -

- brickflow.engine.compute.Cluster - - - - dataclass - - -

- - -
- - - - - - -
- - - - - -

Attributes

- -
- - - -

-aws_attributes: Optional[Dict[str, Any]] = None - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-custom_tags: Optional[Dict[str, str]] = None - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-data_security_mode: str = DataSecurityMode.SINGLE_USER - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-dlt_auto_scale_mode: Optional[str] = None - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-driver_instance_pool_id: Optional[str] = None - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-driver_node_type_id: Optional[str] = None - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-enable_elastic_disk: Optional[bool] = None - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-existing_cluster_id: Optional[str] = None - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-init_scripts: Optional[List[Dict[str, str]]] = None - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-instance_pool_id: Optional[str] = None - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-is_new_job_cluster: bool - - - property - - -

- - -
-
- -
- -
- - - -

-job_task_field_dict: Dict[str, str] - - - property - - -

- - -
-
- -
- -
- - - -

-max_workers: Optional[int] = None - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-min_workers: Optional[int] = None - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-name: str - - - instance-attribute - - -

- - -
-
- -
- -
- - - -

-node_type_id: str - - - instance-attribute - - -

- - -
-
- -
- -
- - - -

-num_workers: Optional[int] = None - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-policy_id: Optional[str] = None - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-runtime_engine: Optional[Literal['STANDARD', 'PHOTON']] = None - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-spark_conf: Optional[Dict[str, str]] = None - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-spark_env_vars: Optional[Dict[str, str]] = None - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-spark_version: str - - - instance-attribute - - -

- - -
-
- -
- -

Functions

- -
- - - -

-__hash__() -> int - -

- - -
- -
- Source code in brickflow/engine/compute.py -
90
-91
-92
def __hash__(self) -> int:
-    # dedupe dicts and lists which are default un hashable. Easiest way to identify dupes.
-    return hash(json.dumps(self.as_dict()))
-
-
-
- -
- -
- - - -

-__post_init__() -> None - -

- - -
- -
- Source code in brickflow/engine/compute.py -
def __post_init__(self) -> None:
-    self.validate()
-
-
-
- -
- -
- - - -

-as_dict(is_dlt_cluster: bool = False, allowed_fields: Optional[List[str]] = None, remove_fields: Optional[List[str]] = None) -> Dict[str, Any] - -

- - -
- -
- Source code in brickflow/engine/compute.py -
def as_dict(
-    self,
-    is_dlt_cluster: bool = False,
-    allowed_fields: Optional[List[str]] = None,
-    remove_fields: Optional[List[str]] = None,
-) -> Dict[str, Any]:
-    d = dataclasses.asdict(self)
-    d = {**d, **self.autoscale(is_dlt_cluster=is_dlt_cluster)}
-    # if allowed fields are provided and check if value is in set
-    self.cleanup(d, allowed_fields=allowed_fields, remove_fields=remove_fields)
-    return d
-
-
-
- -
- -
- - - -

-autoscale(is_dlt_cluster: bool = False) -> Dict[str, Any] - -

- - -
- -
- Source code in brickflow/engine/compute.py -
def autoscale(self, is_dlt_cluster: bool = False) -> Dict[str, Any]:
-    if self.min_workers is not None and self.max_workers is not None:
-        resp: Dict[str, Dict[str, Optional[str | int]]] = {
-            "autoscale": {
-                "min_workers": self.min_workers,
-                "max_workers": self.max_workers,
-            }
-        }
-        if is_dlt_cluster is True:
-            resp["autoscale"]["mode"] = self.dlt_auto_scale_mode
-        return resp
-    return {}
-
-
-
- -
- -
- - - -

-cleanup(d: Dict[str, Any], allowed_fields: Optional[List[str]] = None, remove_fields: Optional[List[str]] = None) -> None - - - staticmethod - - -

- - -
- -
- Source code in brickflow/engine/compute.py -
@staticmethod
-def cleanup(
-    d: Dict[str, Any],
-    allowed_fields: Optional[List[str]] = None,
-    remove_fields: Optional[List[str]] = None,
-) -> None:
-    d.pop("min_workers", None)
-    d.pop("max_workers", None)
-    d.pop("dlt_auto_scale_mode", None)
-    d.pop("existing_cluster_id", None)
-    remove_fields = remove_fields or []
-    for k in list(d.keys()):
-        # if allowed fields are provided and check if value is in set
-        if allowed_fields and k not in allowed_fields:
-            d.pop(k, None)
-        if k in remove_fields:
-            d.pop(k, None)
-
-
-
- -
- -
- - - -

-from_existing_cluster(existing_cluster_id: str) -> 'Cluster' - - - classmethod - - -

- - -
- -
- Source code in brickflow/engine/compute.py -
@classmethod
-def from_existing_cluster(cls, existing_cluster_id: str) -> "Cluster":
-    # just some stub value
-    return Cluster(
-        existing_cluster_id,
-        existing_cluster_id,
-        existing_cluster_id,
-        existing_cluster_id=existing_cluster_id,
-    )
-
-
-
- -
- -
- - - -

-validate() -> None - -

- - -
- -
- Source code in brickflow/engine/compute.py -
def validate(self) -> None:
-    assert not (
-        self.num_workers is not None
-        and self.min_workers is not None
-        and self.max_workers is not None
-    ), "Num workers should not be provided with min and max workers"
-    assert not (
-        (self.min_workers is None and self.max_workers is not None)
-        or (self.min_workers is not None and self.max_workers is None)
-    ), "Both min workers and max workers should be present if one is provided"
-    # noinspection PyTypeChecker
-    assert not (
-        (self.min_workers is not None and self.max_workers is not None)
-        and (self.min_workers > self.max_workers)
-    ), "Min workers should be less than max workers"
-
-
-
- -
- - - -
- -
- -
- -
- - - -

- brickflow.engine.compute.Runtimes - - -

- - -
- - - - - - -
- - - - - -

Attributes

- -
- - - -

-RUNTIME_10_4_X_AARCH64_PHOTON_SCALA2_12_LTS = '10.4.x-aarch64-photon-scala2.12' - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-RUNTIME_10_4_X_AARCH64_SCALA2_12_LTS = '10.4.x-aarch64-scala2.12' - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-RUNTIME_10_4_X_CPU_ML_SCALA2_12_LTS = '10.4.x-cpu-ml-scala2.12' - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-RUNTIME_10_4_X_GPU_ML_SCALA2_12_LTS = '10.4.x-gpu-ml-scala2.12' - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-RUNTIME_10_4_X_PHOTON_SCALA2_12_LTS = '10.4.x-photon-scala2.12' - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-RUNTIME_10_4_X_SCALA2_12_LTS = '10.4.x-scala2.12' - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-RUNTIME_10_5_X_AARCH64_PHOTON_SCALA2_12 = '10.5.x-aarch64-photon-scala2.12' - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-RUNTIME_10_5_X_AARCH64_SCALA2_12 = '10.5.x-aarch64-scala2.12' - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-RUNTIME_10_5_X_CPU_ML_SCALA2_12 = '10.5.x-cpu-ml-scala2.12' - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-RUNTIME_10_5_X_GPU_ML_SCALA2_12 = '10.5.x-gpu-ml-scala2.12' - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-RUNTIME_10_5_X_PHOTON_SCALA2_12 = '10.5.x-photon-scala2.12' - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-RUNTIME_10_5_X_SCALA2_12 = '10.5.x-scala2.12' - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-RUNTIME_11_0_X_AARCH64_PHOTON_SCALA2_12 = '11.0.x-aarch64-photon-scala2.12' - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-RUNTIME_11_0_X_AARCH64_SCALA2_12 = '11.0.x-aarch64-scala2.12' - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-RUNTIME_11_0_X_CPU_ML_SCALA2_12 = '11.0.x-cpu-ml-scala2.12' - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-RUNTIME_11_0_X_GPU_ML_SCALA2_12 = '11.0.x-gpu-ml-scala2.12' - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-RUNTIME_11_0_X_PHOTON_SCALA2_12 = '11.0.x-photon-scala2.12' - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-RUNTIME_11_0_X_SCALA2_12 = '11.0.x-scala2.12' - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-RUNTIME_11_1_X_AARCH64_PHOTON_SCALA2_12 = '11.1.x-aarch64-photon-scala2.12' - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-RUNTIME_11_1_X_AARCH64_SCALA2_12 = '11.1.x-aarch64-scala2.12' - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-RUNTIME_11_1_X_CPU_ML_SCALA2_12 = '11.1.x-cpu-ml-scala2.12' - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-RUNTIME_11_1_X_GPU_ML_SCALA2_12 = '11.1.x-gpu-ml-scala2.12' - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-RUNTIME_11_1_X_PHOTON_SCALA2_12 = '11.1.x-photon-scala2.12' - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-RUNTIME_11_1_X_SCALA2_12 = '11.1.x-scala2.12' - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-RUNTIME_11_2_X_AARCH64_PHOTON_SCALA2_12 = '11.2.x-aarch64-photon-scala2.12' - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-RUNTIME_11_2_X_AARCH64_SCALA2_12 = '11.2.x-aarch64-scala2.12' - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-RUNTIME_11_2_X_CPU_ML_SCALA2_12 = '11.2.x-cpu-ml-scala2.12' - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-RUNTIME_11_2_X_GPU_ML_SCALA2_12 = '11.2.x-gpu-ml-scala2.12' - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-RUNTIME_11_2_X_PHOTON_SCALA2_12 = '11.2.x-photon-scala2.12' - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-RUNTIME_11_2_X_SCALA2_12 = '11.2.x-scala2.12' - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-RUNTIME_11_3_X_AARCH64_PHOTON_SCALA2_12 = '11.3.x-aarch64-photon-scala2.12' - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-RUNTIME_11_3_X_AARCH64_SCALA2_12 = '11.3.x-aarch64-scala2.12' - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-RUNTIME_11_3_X_CPU_ML_SCALA2_12 = '11.3.x-cpu-ml-scala2.12' - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-RUNTIME_11_3_X_GPU_ML_SCALA2_12 = '11.3.x-gpu-ml-scala2.12' - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-RUNTIME_11_3_X_PHOTON_SCALA2_12 = '11.3.x-photon-scala2.12' - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-RUNTIME_11_3_X_SCALA2_12 = '11.3.x-scala2.12' - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-RUNTIME_7_3_X_CPU_ML_SCALA2_12_LTS = '7.3.x-cpu-ml-scala2.12' - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-RUNTIME_7_3_X_GPU_ML_SCALA2_12_LTS = '7.3.x-gpu-ml-scala2.12' - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-RUNTIME_7_3_X_HLS_SCALA2_12_LTS = '7.3.x-hls-scala2.12' - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-RUNTIME_7_3_X_SCALA2_12_LTS = '7.3.x-scala2.12' - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-RUNTIME_9_1_X_AARCH64_SCALA2_12_LTS = '9.1.x-aarch64-scala2.12' - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-RUNTIME_9_1_X_CPU_ML_SCALA2_12_LTS = '9.1.x-cpu-ml-scala2.12' - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-RUNTIME_9_1_X_GPU_ML_SCALA2_12_LTS = '9.1.x-gpu-ml-scala2.12' - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-RUNTIME_9_1_X_PHOTON_SCALA2_12_LTS = '9.1.x-photon-scala2.12' - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-RUNTIME_9_1_X_SCALA2_12_LTS = '9.1.x-scala2.12' - - - class-attribute - instance-attribute - - -

- - -
-
- -
- - - - - -
- -
- -
- - - - -
- -
- -
- - - - - - -
-
- - -
- - - -
- - - -
-
-
-
- - - - - - - - - \ No newline at end of file diff --git a/dev/api/context/index.html b/dev/api/context/index.html deleted file mode 100644 index 38a6c146..00000000 --- a/dev/api/context/index.html +++ /dev/null @@ -1,2864 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - Context - BrickFlow - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - Skip to content - - -
-
- -
- - - - - - - - -
- - - - - - - -
- -
- - - - -
-
- - - -
-
-
- - - - - - -
-
-
- - - -
-
-
- - - -
-
-
- - - -
-
- - - - - - - -
- - - -
- - - -
- - - - - -

Attributes

- -
- - - -

-brickflow.context.context.BRANCH_SKIP_EXCEPT = 'branch_skip_except' - - - module-attribute - - -

- - -
-
- -
- -
- - - -

-brickflow.context.context.RETURN_VALUE_KEY = 'return_value' - - - module-attribute - - -

- - -
-
- -
- -
- - - -

-brickflow.context.context.SKIP_EXCEPT_HACK = 'brickflow_hack_skip_all' - - - module-attribute - - -

- - -
-
- -
- -
- - - -

-brickflow.context.context.T = TypeVar('T') - - - module-attribute - - -

- - -
-
- -
- -
- - - -

-brickflow.context.context.ctx = Context() - - - module-attribute - - -

- - -
-
- -
-

Classes

- -
- - - -

- brickflow.context.context.BrickflowBuiltInTaskVariables - - -

- - -
-

- Bases: Enum

- - - - - - -
- - - - - -

Attributes

- -
- - - -

-job_id = 'brickflow_job_id' - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-parent_run_id = 'brickflow_parent_run_id' - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-run_id = 'brickflow_run_id' - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-start_date = 'brickflow_start_date' - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-start_time = 'brickflow_start_time' - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-task_key = 'brickflow_task_key' - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-task_retry_count = 'brickflow_task_retry_count' - - - class-attribute - instance-attribute - - -

- - -
-
- -
- - - - - -
- -
- -
- -
- - - -

- brickflow.context.context.BrickflowInternalVariables - - -

- - -
-

- Bases: Enum

- - - - - - -
- - - - - -

Attributes

- -
- - - -

-env = BrickflowEnvVars.BRICKFLOW_ENV.value.lower() - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-only_run_tasks = 'brickflow_internal_only_run_tasks' - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-task_id = 'brickflow_internal_task_name' - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-workflow_id = 'brickflow_internal_workflow_name' - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-workflow_prefix = 'brickflow_internal_workflow_prefix' - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-workflow_suffix = 'brickflow_internal_workflow_suffix' - - - class-attribute - instance-attribute - - -

- - -
-
- -
- - - - - -
- -
- -
- -
- - - -

- brickflow.context.context.BrickflowTaskComs - - - - dataclass - - -

- - -
- - - - - - -
- - - - - -

Attributes

- -
- - - -

-dbutils: Optional[Any] = None - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-storage: Dict[str, Any] = field(init=False, default_factory=lambda : {}) - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -

Functions

- -
- - - -

-get(task_id: str, key: Optional[str] = None) -> Any - -

- - -
- -
- Source code in brickflow/context/context.py -
def get(self, task_id: str, key: Optional[str] = None) -> Any:
-    if key is None:
-        return BrickflowTaskComsDict(task_id=task_id, task_coms=self)
-    if self.dbutils is not None:
-        encoded_value = self.dbutils.jobs.taskValues.get(
-            key=key, taskKey=task_id, debugValue="debug"
-        )
-        return BrickflowTaskComsObject.from_encoded_value(encoded_value).value
-    else:
-        # TODO: logging using local task coms
-        encoded_value = self.storage[self._key(task_id, key)]
-        return BrickflowTaskComsObject.from_encoded_value(encoded_value).value
-
-
-
- -
- -
- - - -

-put(task_id: str, key: str, value: Any) -> None - -

- - -
- -
- Source code in brickflow/context/context.py -
def put(self, task_id: str, key: str, value: Any) -> None:
-    encoded_value = BrickflowTaskComsObject(value).to_encoded_value
-    if self.dbutils is not None:
-        self.dbutils.jobs.taskValues.set(key, encoded_value)
-    else:
-        # TODO: logging using local task coms
-        self.storage[self._key(task_id, key)] = encoded_value
-
-
-
- -
- - - -
- -
- -
- -
- - - -

- brickflow.context.context.BrickflowTaskComsDict - - - - dataclass - - -

- - -
- - - - - - -
- - - - - -

Attributes

- -
- - - -

-task_coms: BrickflowTaskComs - - - instance-attribute - - -

- - -
-
- -
- -
- - - -

-task_id: str - - - instance-attribute - - -

- - -
-
- -
- - - - - -
- -
- -
- -
- - - -

- brickflow.context.context.BrickflowTaskComsObject - - - - dataclass - - -

- - -
- - - - - - -
- - - - - -

Attributes

- -
- - - -

-to_encoded_value: str - - - property - - -

- - -
-
- -
- -
- - - -

-value: Any - - - property - - -

- - -
-
- -
- -

Functions

- -
- - - -

-from_encoded_value(encoded_value: Union[str, bytes]) -> BrickflowTaskComsObject - - - classmethod - - -

- - -
- -
- Source code in brickflow/context/context.py -
@classmethod
-def from_encoded_value(
-    cls, encoded_value: Union[str, bytes]
-) -> "BrickflowTaskComsObject":
-    try:
-        _encoded_value = (
-            encoded_value
-            if isinstance(encoded_value, bytes)
-            else encoded_value.encode("utf-8")
-        )
-        b64_bytes = base64.b64decode(_encoded_value)
-        return cls(pickle.loads(b64_bytes).value)
-    except binascii.Error:
-        _decoded_value = (
-            encoded_value.decode("utf-8")
-            if isinstance(encoded_value, bytes)
-            else encoded_value
-        )
-        return cls(_decoded_value)
-
-
-
- -
- - - -
- -
- -
- -
- - - -

-brickflow.context.context.Context() -> None - -

- - -
- - - -
- Source code in brickflow/context/context.py -
def __init__(self) -> None:
-    # Order of init matters todo: fix this
-
-    self._dbutils: Optional[Any] = None
-    self._spark: Optional[Any] = None
-    self._task_coms: BrickflowTaskComs
-    self._current_task: Optional[str] = None
-    self._configure()
-    self._current_project: Optional[str] = None
-
-
- - - -
- - - - - -

Attributes

- -
- - - -

-current_project: Optional[str] - - - property - - -

- - -
-
- -
- -
- - - -

-current_task: Optional[str] - - - property - - -

- - -
-
- -
- -
- - - -

-dbutils: DBUtils - - - property - - -

- - -
-
- -
- -
- - - -

-env: str - - - property - - -

- - -
-
- -
- -
- - - -

-log: logging.Logger - - - property - - -

- - -
-
- -
- -
- - - -

-spark: SparkSession - - - property - - -

- - -
-
- -
- -
- - - -

-task_coms: BrickflowTaskComs - - - property - - -

- - -
-
- -
- -

Functions

- -
- - - -

-dbutils_widget_get_or_else(key: str, debug: Optional[str]) -> Optional[str] - -

- - -
- -
- Source code in brickflow/context/context.py -
@deprecated
-def dbutils_widget_get_or_else(
-    self, key: str, debug: Optional[str]
-) -> Optional[str]:
-    try:
-        return self.dbutils.widgets.get(key)
-    except Exception:
-        # todo: log error
-        return debug
-
-
-
- -
- -
- - - -

-get_by_env(purpose: str, *, default: Optional[T] = None, local: Optional[T] = None, dev: Optional[T] = None, non_prod: Optional[T] = None, test: Optional[T] = None, qa: Optional[T] = None, prod: Optional[T] = None, uat: Optional[T] = None, **kwargs: Optional[T]) -> Optional[T] - -

- - -
- -
- Source code in brickflow/context/context.py -
def get_by_env(
-    self,
-    purpose: str,
-    *,
-    default: Optional[T] = None,
-    local: Optional[T] = None,
-    dev: Optional[T] = None,
-    non_prod: Optional[T] = None,
-    test: Optional[T] = None,
-    qa: Optional[T] = None,
-    prod: Optional[T] = None,
-    uat: Optional[T] = None,
-    **kwargs: Optional[T],
-) -> Optional[T]:
-    # deep copy without modifying kwargs
-    def add_if_not_none(
-        _d: Dict[str, Optional[T]], _k: str, _v: Optional[T]
-    ) -> None:
-        if _v is None:
-            return
-        _d[_k] = _v
-
-    _dict = copy.deepcopy(kwargs)
-    add_if_not_none(_dict, "local", local)
-    add_if_not_none(_dict, "non_prod", non_prod)
-    add_if_not_none(_dict, "dev", dev)
-    add_if_not_none(_dict, "test", test)
-    add_if_not_none(_dict, "qa", qa)
-    add_if_not_none(_dict, "prod", prod)
-    add_if_not_none(_dict, "uat", uat)
-    _env = self.env
-    _ilog.info("Configuring: %s; Using env: '%s' to fetch value...", purpose, _env)
-    if _env not in _dict and default is None:
-        raise KeyError(
-            f"Configuring: {purpose}; Unable to find environment key: {_env}, "
-            f"only found env definitions: {list(_dict.keys())}"
-        )
-    if _env not in _dict and default is not None:
-        _ilog.info(
-            "Configuring: %s; Found no value configured with env: '%s' using default value...",
-            purpose,
-            _env,
-        )
-    res = _dict.get(_env, default)
-    return res
-
-
-
- -
- -
- - - -

-get_parameter(key: str, debug: Optional[str] = None) -> Optional[str] - -

- - -
- -
- Source code in brickflow/context/context.py -
def get_parameter(self, key: str, debug: Optional[str] = None) -> Optional[str]:
-    try:
-        return self.dbutils.widgets.get(key)
-    except Exception:
-        # todo: log error
-        _ilog.debug("Unable to get parameter: %s from dbutils", key)
-        return debug
-
-
-
- -
- -
- - - -

-get_return_value(task_key: Union[str, Callable]) -> Any - -

- - -
- -
- Source code in brickflow/context/context.py -
def get_return_value(self, task_key: Union[str, Callable]) -> Any:
-    task_key = task_key.__name__ if callable(task_key) else task_key
-    return self.task_coms.get(task_key, RETURN_VALUE_KEY)
-
-
-
- -
- -
- - - -

-is_local() -> bool - -

- - -
- -
- Source code in brickflow/context/context.py -
def is_local(self) -> bool:
-    return self.env == BrickflowDefaultEnvs.LOCAL.value
-
-
-
- -
- -
- - - -

-job_id(*, debug: Optional[str] = None) -> Any - -

- - -
- -

This function fetches the job_id value using the bind_variable decorator. -The implementation is intentionally empty because the decorator handles the logic.

- -
- Source code in brickflow/context/context.py -
@bind_variable(BrickflowBuiltInTaskVariables.job_id)
-def job_id(self, *, debug: Optional[str] = None) -> Any:
-    """
-    This function fetches the job_id value using the bind_variable decorator.
-    The implementation is intentionally empty because the decorator handles the logic.
-    """
-    pass
-
-
-
- -
- -
- - - -

-parent_run_id(*, debug: Optional[str] = None) -> Any - -

- - -
- -

This function fetches the parent_run_id value using the bind_variable decorator. -The implementation is intentionally empty because the decorator handles the logic.

- -
- Source code in brickflow/context/context.py -
@bind_variable(BrickflowBuiltInTaskVariables.parent_run_id)
-def parent_run_id(self, *, debug: Optional[str] = None) -> Any:
-    """
-    This function fetches the parent_run_id value using the bind_variable decorator.
-    The implementation is intentionally empty because the decorator handles the logic.
-    """
-    pass
-
-
-
- -
- -
- - - -

-run_id(*, debug: Optional[str] = None) -> Any - -

- - -
- -

This function fetches the run_id value using the bind_variable decorator. -The implementation is intentionally empty because the decorator handles the logic.

- -
- Source code in brickflow/context/context.py -
@bind_variable(BrickflowBuiltInTaskVariables.run_id)
-def run_id(self, *, debug: Optional[str] = None) -> Any:
-    """
-    This function fetches the run_id value using the bind_variable decorator.
-    The implementation is intentionally empty because the decorator handles the logic.
-    """
-    pass
-
-
-
- -
- -
- - - -

-set_current_project(project: str) -> None - -

- - -
- -
- Source code in brickflow/context/context.py -
def set_current_project(self, project: str) -> None:
-    self._current_project = project
-
-
-
- -
- -
- - - -

-skip_all_except(branch_task: Union[Callable, str]) -> None - -

- - -
- -
- Source code in brickflow/context/context.py -
def skip_all_except(self, branch_task: Union[Callable, str]) -> None:
-    if self._current_task is None:
-        raise RuntimeError("Current task is empty unable to skip...")
-    branch_task_key = (
-        branch_task.__name__
-        if callable(branch_task) and hasattr(branch_task, "__name__") is True
-        else branch_task
-    )
-    self._task_coms.put(self._current_task, BRANCH_SKIP_EXCEPT, branch_task_key)
-
-
-
- -
- -
- - - -

-skip_all_following() -> None - -

- - -
- -
- Source code in brickflow/context/context.py -
def skip_all_following(self) -> None:
-    if self._current_task is None:
-        raise RuntimeError("Current task is empty unable to skip...")
-    self._task_coms.put(self._current_task, BRANCH_SKIP_EXCEPT, SKIP_EXCEPT_HACK)
-
-
-
- -
- -
- - - -

-start_date(*, debug: Optional[str] = None) -> Any - -

- - -
- -

This function fetches the start_date value using the bind_variable decorator. -The implementation is intentionally empty because the decorator handles the logic.

- -
- Source code in brickflow/context/context.py -
@bind_variable(BrickflowBuiltInTaskVariables.start_date)
-def start_date(self, *, debug: Optional[str] = None) -> Any:
-    """
-    This function fetches the start_date value using the bind_variable decorator.
-    The implementation is intentionally empty because the decorator handles the logic.
-    """
-    pass
-
-
-
- -
- -
- - - -

-start_time(*, debug: Optional[str] = None) -> Any - -

- - -
- -

This function fetches the start_time value using the bind_variable decorator. -The implementation is intentionally empty because the decorator handles the logic.

- -
- Source code in brickflow/context/context.py -
@bind_variable(BrickflowBuiltInTaskVariables.start_time)
-def start_time(self, *, debug: Optional[str] = None) -> Any:
-    """
-    This function fetches the start_time value using the bind_variable decorator.
-    The implementation is intentionally empty because the decorator handles the logic.
-    """
-    pass
-
-
-
- -
- -
- - - -

-task_key(*, debug: Optional[str] = None) -> Any - -

- - -
- -

This function fetches the task_key value using the bind_variable decorator. -The implementation is intentionally empty because the decorator handles the logic.

- -
- Source code in brickflow/context/context.py -
@bind_variable(BrickflowBuiltInTaskVariables.task_key)
-def task_key(self, *, debug: Optional[str] = None) -> Any:
-    """
-    This function fetches the task_key value using the bind_variable decorator.
-    The implementation is intentionally empty because the decorator handles the logic.
-    """
-    pass
-
-
-
- -
- -
- - - -

-task_retry_count(*, debug: Optional[str] = None) -> Any - -

- - -
- -

This function fetches the task_retry_count value using the bind_variable decorator. -The implementation is intentionally empty because the decorator handles the logic.

- -
- Source code in brickflow/context/context.py -
@bind_variable(BrickflowBuiltInTaskVariables.task_retry_count)
-def task_retry_count(self, *, debug: Optional[str] = None) -> Any:
-    """
-    This function fetches the task_retry_count value using the bind_variable decorator.
-    The implementation is intentionally empty because the decorator handles the logic.
-    """
-    pass
-
-
-
- -
- - - -
- -
- -
- -
- - - -

- brickflow.context.context.ContextMode - - -

- - -
-

- Bases: Enum

- - - - - - -
- - - - - -

Attributes

- -
- - - -

-databricks = 'databricks' - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-not_databricks = 'not_databricks' - - - class-attribute - instance-attribute - - -

- - -
-
- -
- - - - - -
- -
- -
- -
- - - -

- brickflow.context.context.TaskComsObjectResult - - -

- - -
-

- Bases: Enum

- - - - - - -
- - - - - -

Attributes

- -
- - - -

-NO_RESULTS = 'NO_RESULTS' - - - class-attribute - instance-attribute - - -

- - -
-
- -
- - - - - -
- -
- -
-

Functions

- -
- - - -

-brickflow.context.context.bind_variable(builtin: BrickflowBuiltInTaskVariables) -> Callable - -

- - -
- -
- Source code in brickflow/context/context.py -
48
-49
-50
-51
-52
-53
-54
-55
-56
-57
-58
-59
-60
-61
def bind_variable(builtin: BrickflowBuiltInTaskVariables) -> Callable:
-    def wrapper(f: Callable) -> Callable:
-        @functools.wraps(f)
-        def func(*args, **kwargs):  # type: ignore
-            _self: Context = args[0]  # type: ignore
-            debug = kwargs["debug"]
-            f(*args, **kwargs)  # no-op
-            if _self.dbutils is not None:
-                return _self.get_parameter(builtin.value, debug)
-            return debug
-
-        return func
-
-    return wrapper
-
-
-
- -
- - - -
- -
- -
- - - - - - -
-
- - -
- - - -
- - - -
-
-
-
- - - - - - - - - \ No newline at end of file diff --git a/dev/api/misc/index.html b/dev/api/misc/index.html deleted file mode 100644 index 6e36af4b..00000000 --- a/dev/api/misc/index.html +++ /dev/null @@ -1,973 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - Misc - BrickFlow - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
-
- -
- - - - - - - - -
- - - - - - - -
- -
- - - - -
-
- - - -
-
-
- - - - - - -
-
-
- - - -
-
-
- - - -
-
-
- - - -
-
- - - - - - - -

Misc

- -

under_maintainance

- - - - - - -
-
- - -
- - - -
- - - -
-
-
-
- - - - - - - - - \ No newline at end of file diff --git a/dev/api/project/index.html b/dev/api/project/index.html deleted file mode 100644 index d291ded8..00000000 --- a/dev/api/project/index.html +++ /dev/null @@ -1,2099 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - Project - BrickFlow - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - Skip to content - - -
-
- -
- - - - - - - - -
- - - - - - - -
- -
- - - - -
-
- - - -
-
-
- - - - - - -
-
-
- - - -
-
-
- - - -
-
-
- - - -
-
- - - - - - - -
- - - -
- - - -
- - - - - - -

Classes

- -
- - - -

- brickflow.engine.project.Project - - - - dataclass - - -

- - -
- - - - - - -
- - - - - -

Attributes

- -
- - - -

-batch: bool = True - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-bundle_base_path: Optional[str] = None - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-bundle_obj_name: Optional[str] = None - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-codegen_kwargs: Optional[Dict[str, Any]] = None - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-codegen_mechanism: Optional[Type[CodegenInterface]] = None - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-debug_execute_task: Optional[str] = None - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-debug_execute_workflow: Optional[str] = None - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-entry_point_path: Optional[str] = None - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-git_reference: Optional[str] = None - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-git_repo: Optional[str] = None - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-libraries: Optional[List[TaskLibrary]] = None - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-mode: Optional[str] = None - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-name: str - - - instance-attribute - - -

- - -
-
- -
- -
- - - -

-provider: Optional[str] = None - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-s3_backend: Optional[Dict[str, str]] = None - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -

Functions

- -
- - - -

-__enter__() -> _Project - -

- - -
- -
- Source code in brickflow/engine/project.py -
def __enter__(self) -> "_Project":
-    self._project = _Project(
-        self.name,
-        self.git_repo,
-        self.provider,
-        self.git_reference,
-        self.s3_backend,
-        self.entry_point_path,
-        libraries=self.libraries or [],
-        batch=self.batch,
-        bundle_obj_name=self.bundle_obj_name,
-        bundle_base_path=self.bundle_base_path,
-    )
-    return self._project
-
-
-
- -
- -
- - - -

-__exit__(exc_type, exc_val, exc_tb) -> None - -

- - -
- -
- Source code in brickflow/engine/project.py -
def __exit__(self, exc_type, exc_val, exc_tb) -> None:  # type: ignore
-    if exc_type is not None:
-        error_types = {Stage.deploy: DeployError, Stage.execute: ExecuteError}
-        raise error_types[self._mode](
-            f"Oops... failed during: {self._mode}"
-        ) from exc_val
-
-    if len(self._project.workflows) == 0:
-        _ilog.info("Doing nothing no workflows...")
-        return
-
-    if self._mode.value == Stage.deploy.value:
-        _ilog.info("Deploying changes... to %s", ctx.env)
-        if self.codegen_mechanism is None:
-            raise ValueError(
-                "codegen_mechanism cannot be None; please raise a github issue for this."
-            )
-        codegen = self.codegen_mechanism(
-            project=self._project,
-            id_=f"{ctx.env}_{self.name}",
-            env=ctx.env,
-            **(self.codegen_kwargs or {}),
-        )
-        codegen.synth()
-
-    if self._mode.value == Stage.execute.value:
-        wf_id = ctx.get_parameter(
-            BrickflowInternalVariables.workflow_id.value,
-            self.debug_execute_workflow,
-        )
-        t_id = ctx.get_parameter(
-            BrickflowInternalVariables.task_id.value, self.debug_execute_task
-        )
-
-        if wf_id is None or t_id is None:
-            _ilog.info(
-                "No workflow id or task key was able to found; doing nothing..."
-            )
-            return
-
-        workflow = self._project.get_workflow(wf_id)
-        task = workflow.get_task(t_id)
-        task.execute()
-
-
-
- -
- -
- - - -

-__post_init__() -> None - -

- - -
- -
- Source code in brickflow/engine/project.py -
def __post_init__(self) -> None:
-    self._mode = Stage[
-        config(BrickflowEnvVars.BRICKFLOW_MODE.value, default=Stage.execute.value)
-    ]
-    self.entry_point_path = self.entry_point_path or get_caller_info()
-
-    # setup current_project
-    env_project_name = config(
-        BrickflowEnvVars.BRICKFLOW_PROJECT_NAME.value, default=None
-    )
-
-    if (
-        env_project_name is not None
-        and self.name is not None
-        and env_project_name != self.name
-    ):
-        raise ValueError(
-            "Project name in config files and entrypoint must be the same"
-        )
-
-    ctx.set_current_project(self.name or env_project_name)  # always setup first
-
-    # populate bundle info via env vars
-    self.bundle_obj_name = config(
-        BrickflowEnvVars.BRICKFLOW_BUNDLE_OBJ_NAME.value,
-        default=".brickflow_bundles",
-    )
-    self.bundle_base_path = config(
-        BrickflowEnvVars.BRICKFLOW_BUNDLE_BASE_PATH.value,
-        default="/Users/${workspace.current_user.userName}",
-    )
-
-    self.git_reference = config(
-        BrickflowEnvVars.BRICKFLOW_GIT_REF.value, default=self.get_git_ref()
-    )
-
-    if (
-        self._mode == Stage.deploy
-        and ctx.is_local() is False
-        and self.git_reference is None
-    ):
-        raise ValueError(
-            "git_reference must be set when deploying to non-local envs"
-        )
-
-    self.provider = config(
-        BrickflowEnvVars.BRICKFLOW_GIT_PROVIDER.value, default=self.provider
-    )
-    self.git_repo = config(
-        BrickflowEnvVars.BRICKFLOW_GIT_REPO.value, default=self.git_repo
-    )
-    if self.s3_backend is None:
-        self.s3_backend = {
-            "bucket": config("BRICKFLOW_S3_BACKEND_BUCKET", default=None),
-            "key": config("BRICKFLOW_S3_BACKEND_KEY", default=None),
-            "region": config("BRICKFLOW_S3_BACKEND_REGION", default=None),
-            "dynamodb_table": config(
-                "BRICKFLOW_S3_BACKEND_DYNAMODB_TABLE", default=None
-            ),
-        }
-        if all(value is None for value in self.s3_backend.values()):
-            self.s3_backend = None
-
-    deployment_mode = config(
-        BrickflowEnvVars.BRICKFLOW_DEPLOYMENT_MODE.value, default="cdktf"
-    )
-    if deployment_mode == BrickflowDeployMode.CDKTF.value:
-        self.codegen_mechanism = HashicorpCDKTFGen
-    elif deployment_mode == BrickflowDeployMode.BUNDLE.value:
-        self.codegen_mechanism = DatabricksBundleCodegen
-    if self.codegen_kwargs is None:
-        self.codegen_kwargs = {}
-
-
-
- -
- -
- - - -

-get_git_ref() -> Optional[str] - -

- - -
- -
- Source code in brickflow/engine/project.py -
def get_git_ref(self) -> Optional[str]:
-    if self._mode == Stage.deploy:
-        if self.git_reference is not None:
-            return self.git_reference
-        else:
-            try:
-                return f"commit/{get_current_commit()}"
-            except Exception:
-                _ilog.warning(
-                    "Unable to get current commit; defaulting to empty string"
-                )
-                return "commit/fake-local-stub" if ctx.is_local() else None
-    else:
-        return self.git_reference if self.git_reference is not None else ""
-
-
-
- -
- - - -
- -
- -
- - - - -
- -
- -
- - - - - - -
-
- - -
- - - -
- - - -
-
-
-
- - - - - - - - - \ No newline at end of file diff --git a/dev/api/secrets/index.html b/dev/api/secrets/index.html deleted file mode 100644 index 97e8933a..00000000 --- a/dev/api/secrets/index.html +++ /dev/null @@ -1,1849 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - Secrets - BrickFlow - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - Skip to content - - -
-
- -
- - - - - - - - -
- - - - - - - -
- -
- - - - -
-
- - - -
-
-
- - - - - - -
-
-
- - - -
-
-
- - - -
-
-
- - - -
-
- - - - - - - -
- - - -
- - - -
- - - - - -

Attributes

- -
- - - -

-brickflow_plugins.secrets.BRICKFLOW_SECRETS_BACKEND = 'brickflow_secrets_backend' - - - module-attribute - - -

- - -
-
- -
- -
- - - -

-brickflow_plugins.secrets.brickflow_secrets_backend_plugin_impl = pluggy.HookimplMarker(BRICKFLOW_SECRETS_BACKEND) - - - module-attribute - - -

- - -
-
- -
- -
- - - -

-brickflow_plugins.secrets.brickflow_secrets_plugin_spec = pluggy.HookspecMarker(BRICKFLOW_SECRETS_BACKEND) - - - module-attribute - - -

- - -
-
- -
-

Classes

- -
- - - -

- brickflow_plugins.secrets.AbstractSecretsHelper - - -

- - -
-

- Bases: abc.ABC

- - - - - - -
- - - - - -

Attributes

- -
- - - -

-PROTOCOL_STARTS_WITH: Optional[Union[str, List[str]]] = None - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -

Functions

- -
- - - -

-get_secret_value_from_url(url_parsed_result: ParseResult) - -

- - -
- -
- Source code in brickflow_plugins/secrets/__init__.py -
56
-57
-58
-59
-60
-61
-62
-63
-64
-65
-66
-67
-68
-69
def get_secret_value_from_url(self, url_parsed_result: ParseResult):
-    allowed_protocols = (
-        [self.PROTOCOL_STARTS_WITH]
-        if isinstance(self.PROTOCOL_STARTS_WITH, str)
-        else self.PROTOCOL_STARTS_WITH
-    )
-    if self.PROTOCOL_STARTS_WITH is not None and not any(
-        [
-            url_parsed_result.scheme.lower().startswith(protocol)
-            for protocol in allowed_protocols
-        ]
-    ):
-        return None
-    return self._get_secret_value_from_url(url_parsed_result)
-
-
-
- -
- - - -
- -
- -
- -
- - - -

- brickflow_plugins.secrets.B64SecretsHelper - - -

- - -
-

- Bases: AbstractSecretsHelper

- - - - - - -
- - - - - -

Attributes

- -
- - - -

-PROTOCOL_STARTS_WITH = ['base64', 'b64'] - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -

Functions

- - - -
- -
- -
- -
- - - -

- brickflow_plugins.secrets.Base64BrickflowSecretPluginImpl - - -

- - -
-

- Bases: BrickflowSecretPluginSpec

- - - - - - -
- - - - - - - -

Functions

- -
- - - -

-get_secret_value(url_parsed_result: ParseResult) -> Optional['str'] - - - staticmethod - - -

- - -
- -
- Source code in brickflow_plugins/secrets/__init__.py -
@staticmethod
-@brickflow_secrets_backend_plugin_impl
-def get_secret_value(url_parsed_result: ParseResult) -> Optional["str"]:
-    return B64SecretsHelper().get_secret_value_from_url(url_parsed_result)
-
-
-
- -
- - - -
- -
- -
- -
- - - -

- brickflow_plugins.secrets.BrickflowSecretPluginSpec - - -

- - -
- - - - - - -
- - - - - - - -

Functions

- -
- - - -

-get_secret_value(url_parsed_result: ParseResult) -> Optional['str'] - - - staticmethod - - -

- - -
- -

Custom execute method that is able to be plugged in.

- -
- Source code in brickflow_plugins/secrets/__init__.py -
28
-29
-30
-31
@staticmethod
-@brickflow_secrets_plugin_spec(firstresult=True)
-def get_secret_value(url_parsed_result: ParseResult) -> Optional["str"]:
-    """Custom execute method that is able to be plugged in."""
-
-
-
- -
- - - -
- -
- -
- -
- - - -

- brickflow_plugins.secrets.BrickflowSecretsBackend - - -

- - -
-

- Bases: BaseSecretsBackend

- - - - - - -
- - - - - - - -

Functions

- -
- - - -

-get_conn_value(conn_id: str) -> str | None - -

- - -
- -
- Source code in brickflow_plugins/secrets/__init__.py -
def get_conn_value(self, conn_id: str) -> str | None:
-    parsed_url = urlparse(conn_id)
-    return get_brickflow_tasks_hook().get_secret_value(url_parsed_result=parsed_url)
-
-
-
- -
- -
- - - -

-set_backend_env() - -

- - -
- -
- Source code in brickflow_plugins/secrets/__init__.py -
def set_backend_env(self):
-    for k, v in self._get_secrets_backend_env().items():
-        os.environ[k] = v
-
-
-
- -
- -
- - - -

-unset_backend_env() - -

- - -
- -
- Source code in brickflow_plugins/secrets/__init__.py -
def unset_backend_env(self):
-    for k in self._get_secrets_backend_env().keys():
-        os.environ.pop(k, None)
-
-
-
- -
- - - -
- -
- -
- -
- - - -

- brickflow_plugins.secrets.CerberusBrickflowSecretPluginImpl - - -

- - -
-

- Bases: BrickflowSecretPluginSpec

- - - - - - -
- - - - - - - -

Functions

- -
- - - -

-get_secret_value(url_parsed_result: ParseResult) -> Optional['str'] - - - staticmethod - - -

- - -
- -
- Source code in brickflow_plugins/secrets/__init__.py -
@staticmethod
-@brickflow_secrets_backend_plugin_impl
-def get_secret_value(url_parsed_result: ParseResult) -> Optional["str"]:
-    return CerberusSecretsHelper().get_secret_value_from_url(url_parsed_result)
-
-
-
- -
- - - -
- -
- -
- -
- - - -

- brickflow_plugins.secrets.CerberusSecretsHelper - - -

- - -
-

- Bases: AbstractSecretsHelper

- - - - - - -
- - - - - -

Attributes

- -
- - - -

-PROTOCOL_STARTS_WITH = 'cerberus' - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -

Functions

- -
- - - -

-parse_path_and_key(path: Optional[str]) -> Optional[Tuple[str, str]] - - - staticmethod - - -

- - -
- -
- Source code in brickflow_plugins/secrets/__init__.py -
89
-90
-91
-92
-93
-94
@staticmethod
-def parse_path_and_key(path: Optional[str]) -> Optional[Tuple[str, str]]:
-    if path is not None:
-        _cleaned_path = path.lstrip("/").rstrip("/")
-        return "/".join(_cleaned_path.split("/")[:-1]), _cleaned_path.split("/")[-1]
-    return None
-
-
-
- -
- - - -
- -
- -
- -
- - - -

- brickflow_plugins.secrets.DatabricksSecretsBrickflowSecretPluginImpl - - -

- - -
-

- Bases: BrickflowSecretPluginSpec

- - - - - - -
- - - - - - - -

Functions

- -
- - - -

-get_secret_value(url_parsed_result: ParseResult) -> Optional['str'] - - - staticmethod - - -

- - -
- -
- Source code in brickflow_plugins/secrets/__init__.py -
@staticmethod
-@brickflow_secrets_backend_plugin_impl
-def get_secret_value(url_parsed_result: ParseResult) -> Optional["str"]:
-    # not implemented yet
-    return None
-
-
-
- -
- - - -
- -
- -
-

Functions

- -
- - - -

-brickflow_plugins.secrets.get_brickflow_tasks_hook() -> BrickflowSecretPluginSpec - - - cached - - -

- - -
- -
- Source code in brickflow_plugins/secrets/__init__.py -
34
-35
-36
-37
-38
-39
-40
-41
-42
-43
-44
-45
-46
-47
@functools.lru_cache
-def get_brickflow_tasks_hook() -> BrickflowSecretPluginSpec:
-    pm = pluggy.PluginManager(BRICKFLOW_SECRETS_BACKEND)
-    pm.add_hookspecs(BrickflowSecretPluginSpec)
-    pm.load_setuptools_entrypoints(BRICKFLOW_SECRETS_BACKEND)
-    pm.register(CerberusBrickflowSecretPluginImpl())
-    pm.register(Base64BrickflowSecretPluginImpl())
-    for name, plugin_instance in pm.list_name_plugin():
-        log.info(
-            "Loaded plugin with name: %s and class: %s",
-            name,
-            plugin_instance.__class__.__name__,
-        )
-    return pm.hook
-
-
-
- -
- - - -
- -
- -
- - - - - - -
-
- - -
- - - -
- - - -
-
-
-
- - - - - - - - - \ No newline at end of file diff --git a/dev/api/task/index.html b/dev/api/task/index.html deleted file mode 100644 index 7b437850..00000000 --- a/dev/api/task/index.html +++ /dev/null @@ -1,4825 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - Task - BrickFlow - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - Skip to content - - -
-
- -
- - - - - - - - -
- - - - - - - -
- -
- - - - -
-
- - - -
-
-
- - - - - - -
-
-
- - - -
-
-
- - - -
-
-
- - - -
-
- - - - - - - -
- - - -
- - - -
- - - - - - -

Classes

- -
- - - -

- brickflow.engine.task.Task - - - - dataclass - - -

- - -
- - - - - - -
- - - - - -

Attributes

- -
- - - -

-brickflow_default_params: Dict[str, str] - - - property - - -

- - -
-
- -
- -
- - - -

-builtin_notebook_params: Dict[str, str] - - - property - - -

- - -
-
- -
- -
- - - -

-cluster: Cluster - - - instance-attribute - - -

- - -
-
- -
- -
- - - -

-custom_execute_callback: Optional[Callable] = None - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-custom_task_parameters: Dict[str, str] - - - property - - -

- - -
-
- -
- -
- - - -

-databricks_task_type_str: str - - - property - - -

- - -
-
- -
- -
- - - -

-depends_on: List[Union[Callable, str]] = field(default_factory=lambda : []) - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-depends_on_names: Iterator[str] - - - property - - -

- - -
-
- -
- -
- - - -

-description: Optional[str] = None - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-libraries: List[TaskLibrary] = field(default_factory=lambda : []) - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-name: str - - - property - - -

- - -
-
- -
- -
- - - -

-parents: List[str] - - - property - - -

- - -
-
- -
- -
- - - -

-task_func: Callable - - - instance-attribute - - -

- - -
-
- -
- -
- - - -

-task_func_name: str - - - property - - -

- - -
-
- -
- -
- - - -

-task_id: str - - - instance-attribute - - -

- - -
-
- -
- -
- - - -

-task_settings: Optional[TaskSettings] = None - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-task_type: TaskType = TaskType.BRICKFLOW_TASK - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-trigger_rule: BrickflowTriggerRule = BrickflowTriggerRule.ALL_SUCCESS - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-workflow: Workflow - - - instance-attribute - - -

- - -
-
- -
- -

Functions

- -
- - - -

-execute() -> Any - -

- - -
- -
- Source code in brickflow/engine/task.py -
@with_brickflow_logger
-def execute(self) -> Any:
-    # Workflow is:
-    #   1. Check to see if there selected tasks and if there are is this task in the list
-    #   2. Check to see if the previous task is skipped and trigger rule.
-    #   3. Check to see if this a custom python task and execute it
-    #   4. Execute the task function
-    ctx._set_current_task(self.name)
-    _select_task_skip, _select_task_skip_reason = self._skip_because_not_selected()
-    if _select_task_skip is True:
-        # check if this task is skipped due to task selection
-        _ilog.info(
-            "Skipping task... %s for reason: %s",
-            self.name,
-            _select_task_skip_reason,
-        )
-        ctx._reset_current_task()
-        return
-    _skip, reason = self.should_skip()
-    if _skip is True:
-        _ilog.info("Skipping task... %s for reason: %s", self.name, reason)
-        ctx.task_coms.put(self.name, BRANCH_SKIP_EXCEPT, SKIP_EXCEPT_HACK)
-        ctx._reset_current_task()
-        return
-
-    initial_resp: TaskResponse = get_brickflow_tasks_hook().task_execute(
-        task=self, workflow=self.workflow
-    )
-    resp: TaskResponse = get_brickflow_tasks_hook().handle_results(
-        resp=initial_resp, task=self, workflow=self.workflow
-    )
-
-    if resp.push_return_value is True:
-        ctx.task_coms.put(self.name, RETURN_VALUE_KEY, resp.response)
-    ctx._reset_current_task()
-    return resp.response
-
-
-
- -
- -
- - - -

-get_obj_dict(entrypoint: str) -> Dict[str, Any] - -

- - -
- -
- Source code in brickflow/engine/task.py -
def get_obj_dict(self, entrypoint: str) -> Dict[str, Any]:
-    return {
-        "notebook_path": self.handle_notebook_path(entrypoint),
-        "base_parameters": {
-            **self.builtin_notebook_params,
-            **self.brickflow_default_params,
-            **self.custom_task_parameters,  # type: ignore
-            # **(self.custom_unique_task_parameters or {}),
-            # TODO: implement only after validating limit on parameters
-        },
-    }
-
-
-
- -
- -
- - - -

-get_runtime_parameter_values() -> Dict[str, Any] - -

- - -
- -
- Source code in brickflow/engine/task.py -
def get_runtime_parameter_values(self) -> Dict[str, Any]:
-    # if dbutils returns None then return v instead
-    return {
-        k: (ctx.get_parameter(k, str(v)) or v)
-        for k, v in (
-            inspect.getfullargspec(self.task_func).kwonlydefaults or {}
-        ).items()
-    }
-
-
-
- -
- -
- - - -

-handle_notebook_path(entrypoint: str) -> str - - - staticmethod - - -

- - -
- -
- Source code in brickflow/engine/task.py -
@staticmethod
-def handle_notebook_path(entrypoint: str) -> str:
-    # local will get created as workspace notebook job and not a git source job
-    if ctx.env == BrickflowDefaultEnvs.LOCAL.value:
-        # check and ensure suffix has .py extension
-        return entrypoint if entrypoint.endswith(".py") else f"{entrypoint}.py"
-    return entrypoint
-
-
-
- -
- -
- - - -

-is_valid_task_signature() -> None - -

- - -
- -
- Source code in brickflow/engine/task.py -
def is_valid_task_signature(self) -> None:
-    # only supports kwonlyargs with defaults
-    spec: inspect.FullArgSpec = inspect.getfullargspec(self.task_func)
-    sig: inspect.Signature = inspect.signature(self.task_func)
-    signature_error_msg = (
-        "Task signatures only supports kwargs with defaults. or catch all varkw **kwargs"
-        "For example def execute(*, variable_a=None, variable_b=None, **kwargs). "
-        f"Please fix function def {self.task_func_name}{sig}: ..."
-    )
-    kwargs_default_error_msg = (
-        f"Keyword arguments must be Strings. "
-        f"Please handle booleans and numbers via strings. "
-        f"Please fix function def {self.task_func_name}{sig}: ..."
-    )
-
-    valid_case = spec.args == [] and spec.varargs is None and spec.defaults is None
-    for _, v in (spec.kwonlydefaults or {}).items():
-        # in python boolean is a type of int must be captured here via short circuit
-        if not (isinstance(v, str) or v is None):
-            raise InvalidTaskSignatureDefinition(kwargs_default_error_msg)
-    if valid_case:
-        return
-
-    raise InvalidTaskSignatureDefinition(signature_error_msg)
-
-
-
- -
- -
- - - -

-should_skip() -> Tuple[bool, Optional[str]] - -

- - -
- -
- Source code in brickflow/engine/task.py -
def should_skip(self) -> Tuple[bool, Optional[str]]:
-    # return true or false and reason
-    node_skip_checks = []
-    for parent in self.parents:
-        if parent != ROOT_NODE:
-            try:
-                task_to_not_skip = ctx.task_coms.get(parent, BRANCH_SKIP_EXCEPT)
-                if self.name != task_to_not_skip:
-                    # set this task to skip hack to keep to empty to trigger failure
-                    # key look up will fail
-                    node_skip_checks.append(True)
-                else:
-                    node_skip_checks.append(False)
-            except Exception:
-                # ignore errors as it probably doesnt exist
-                # TODO: log errors
-                node_skip_checks.append(False)
-    if not node_skip_checks:
-        return False, None
-    if self.trigger_rule == BrickflowTriggerRule.NONE_FAILED:
-        # by default a task failure automatically skips
-        return self._get_skip_with_reason(
-            all(node_skip_checks),
-            "At least one task before this were not successful",
-        )
-    # default is BrickflowTriggerRule.ALL_SUCCESS
-    return self._get_skip_with_reason(
-        any(node_skip_checks), "All tasks before this were not successful"
-    )
-
-
-
- -
- - - -
- -
- -
- -
- - - -

- brickflow.engine.task.EmailNotifications - - - - dataclass - - -

- - -
- - - - - - -
- - - - - -

Attributes

- -
- - - -

-on_failure: Optional[List[str]] = None - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-on_start: Optional[List[str]] = None - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-on_success: Optional[List[str]] = None - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -

Functions

- -
- - - -

-to_tf_dict() -> Dict[str, Optional[List[str]]] - -

- - -
- -
- Source code in brickflow/engine/task.py -
def to_tf_dict(self) -> Dict[str, Optional[List[str]]]:
-    return {
-        "on_start": self.on_start,
-        "on_failure": self.on_failure,
-        "on_success": self.on_success,
-    }
-
-
-
- -
- - - -
- -
- -
- -
- - - -

- brickflow.engine.task.JarTaskLibrary - - - - dataclass - - -

- - -
-

- Bases: StorageBasedTaskLibrary

- - - -

Parameters:

- - - - - - - - - - - - - - - - - -
NameTypeDescriptionDefault
jar - str - -
-

String to s3/dbfs path for jar

-
-
- required -
- - - - - -
- - - - - -

Attributes

- -
- - - -

-jar: str - - - instance-attribute - - -

- - -
-
- -
- -

Functions

- - - -
- -
- -
- -
- - - -

- brickflow.engine.task.EggTaskLibrary - - - - dataclass - - -

- - -
-

- Bases: StorageBasedTaskLibrary

- - - -

Parameters:

- - - - - - - - - - - - - - - - - -
NameTypeDescriptionDefault
egg - str - -
-

String to s3/dbfs path for egg

-
-
- required -
- - - - - -
- - - - - -

Attributes

- -
- - - -

-egg: str - - - instance-attribute - - -

- - -
-
- -
- -

Functions

- - - -
- -
- -
- -
- - - -

- brickflow.engine.task.WheelTaskLibrary - - - - dataclass - - -

- - -
-

- Bases: StorageBasedTaskLibrary

- - - -

Parameters:

- - - - - - - - - - - - - - - - - -
NameTypeDescriptionDefault
whl - str - -
-

String to s3/dbfs path for whl

-
-
- required -
- - - - - -
- - - - - -

Attributes

- -
- - - -

-whl: str - - - instance-attribute - - -

- - -
-
- -
- -

Functions

- - - -
- -
- -
- -
- - - -

- brickflow.engine.task.PypiTaskLibrary - - - - dataclass - - -

- - -
-

- Bases: TaskLibrary

- - - -

Parameters:

- - - - - - - - - - - - - - - - - - - - - - - -
NameTypeDescriptionDefault
package - str - -
-

The package in pypi i.e. requests, requests==x.y.z, git+https://github.com/Nike-Inc/brickflow.git

-
-
- required -
repo - Optional[str] - -
-

The repository where the package can be found. By default pypi is used

-
-
- None -
- - - - - -
- - - - - -

Attributes

- -
- - - -

-dict: Dict[str, Union[str, Dict[str, str]]] - - - property - - -

- - -
-
- -
- -
- - - -

-package: str - - - instance-attribute - - -

- - -
-
- -
- -
- - - -

-repo: Optional[str] = None - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -

Functions

- - - -
- -
- -
- -
- - - -

- brickflow.engine.task.MavenTaskLibrary - - - - dataclass - - -

- - -
-

- Bases: TaskLibrary

- - - -

Parameters:

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
NameTypeDescriptionDefault
coordinates - str - -
-

Gradle-style Maven coordinates. For example: org.jsoup:jsoup:1.7.2.

-
-
- required -
repo - Optional[str] - -
-

Maven repo to install the Maven package from. -If omitted, both Maven Central Repository and Spark Packages are searched.

-
-
- None -
exclusions - Optional[List[str]] - -
-

List of dependences to exclude. For example: ["slf4j:slf4j", "*:hadoop-client"]. -Maven dependency exclusions: -https://maven.apache.org/guides/introduction/introduction-to-optional-and-excludes-dependencies.html.

-
-
- None -
- - - - - -
- - - - - -

Attributes

- -
- - - -

-coordinates: str - - - instance-attribute - - -

- - -
-
- -
- -
- - - -

-dict: Dict[str, Union[str, Dict[str, str]]] - - - property - - -

- - -
-
- -
- -
- - - -

-exclusions: Optional[List[str]] = None - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-repo: Optional[str] = None - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -

Functions

- - - -
- -
- -
- -
- - - -

- brickflow.engine.task.CranTaskLibrary - - - - dataclass - - -

- - -
-

- Bases: TaskLibrary

- - - -

Parameters:

- - - - - - - - - - - - - - - - - - - - - - - -
NameTypeDescriptionDefault
package - str - -
-

The name of the CRAN package to install.

-
-
- required -
repo - Optional[str] - -
-

The repository where the package can be found. If not specified, the default CRAN repo is used.

-
-
- None -
- - - - - -
- - - - - -

Attributes

- -
- - - -

-dict: Dict[str, Union[str, Dict[str, str]]] - - - property - - -

- - -
-
- -
- -
- - - -

-package: str - - - instance-attribute - - -

- - -
-
- -
- -
- - - -

-repo: Optional[str] = None - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -

Functions

- - - -
- -
- -
- -
- - - -

- brickflow.engine.task.BrickflowTriggerRule - - -

- - -
-

- Bases: Enum

- - - - - - -
- - - - - -

Attributes

- -
- - - -

-ALL_SUCCESS = 'all_success' - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-NONE_FAILED = 'none_failed' - - - class-attribute - instance-attribute - - -

- - -
-
- -
- - - - - -
- -
- -
- -
- - - -

- brickflow.engine.task.BrickflowTaskEnvVars - - -

- - -
-

- Bases: Enum

- - - - - - -
- - - - - -

Attributes

- -
- - - -

-BRICKFLOW_SELECT_TASKS = 'BRICKFLOW_SELECT_TASKS' - - - class-attribute - instance-attribute - - -

- - -
-
- -
- - - - - -
- -
- -
- -
- - - -

- brickflow.engine.task.TaskSettings - - - - dataclass - - -

- - -
- - - - - - -
- - - - - -

Attributes

- -
- - - -

-email_notifications: Optional[EmailNotifications] = None - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-max_retries: Optional[int] = None - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-min_retry_interval_millis: Optional[int] = None - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-notification_settings: Optional[TaskNotificationSettings] = None - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-retry_on_timeout: Optional[bool] = None - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-timeout_seconds: Optional[int] = None - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -

Functions

- -
- - - -

-merge(other: Optional['TaskSettings']) -> 'TaskSettings' - -

- - -
- -
- Source code in brickflow/engine/task.py -
def merge(self, other: Optional["TaskSettings"]) -> "TaskSettings":
-    # overrides top level values
-    if other is None:
-        return self
-    return TaskSettings(
-        other.email_notifications or self.email_notifications,
-        other.notification_settings or self.notification_settings,
-        other.timeout_seconds or self.timeout_seconds or 0,
-        other.max_retries or self.max_retries,
-        other.min_retry_interval_millis or self.min_retry_interval_millis,
-        other.retry_on_timeout or self.retry_on_timeout,
-    )
-
-
-
- -
- -
- - - -

-to_tf_dict() -> Dict[str, Optional[str] | Optional[int] | Optional[bool] | Optional[Dict[str, Optional[List[str]]]]] - -

- - -
- -
- Source code in brickflow/engine/task.py -
def to_tf_dict(
-    self,
-) -> Dict[
-    str,
-    Optional[str]
-    | Optional[int]
-    | Optional[bool]
-    | Optional[Dict[str, Optional[List[str]]]],
-]:
-    email_not = (
-        self.email_notifications.to_tf_dict()
-        if self.email_notifications is not None
-        else {}
-    )
-    notification_settings = (
-        {}
-        if self.notification_settings is None
-        else {"notification_settings": self.notification_settings.dict()}
-    )
-    return {
-        **notification_settings,
-        "email_notifications": email_not,
-        "timeout_seconds": self.timeout_seconds,
-        "max_retries": self.max_retries,
-        "min_retry_interval_millis": self.min_retry_interval_millis,
-        "retry_on_timeout": self.retry_on_timeout,
-    }
-
-
-
- -
- - - -
- -
- -
- -
- - - -

- brickflow.engine.task.TaskType - - -

- - -
-

- Bases: Enum

- - - - - - -
- - - - - -

Attributes

- -
- - - -

-BRICKFLOW_TASK = 'brickflow_task' - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-CUSTOM_PYTHON_TASK = 'custom_python_task' - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-DLT = 'pipeline_task' - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-NOTEBOOK_TASK = 'notebook_task' - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-SQL = 'sql_task' - - - class-attribute - instance-attribute - - -

- - -
-
- -
- - - - - -
- -
- -
- - - - -
- -
- -
- - - - - - -
-
- - -
- - - -
- - - -
-
-
-
- - - - - - - - - \ No newline at end of file diff --git a/dev/api/workflow/index.html b/dev/api/workflow/index.html deleted file mode 100644 index 8d4310d4..00000000 --- a/dev/api/workflow/index.html +++ /dev/null @@ -1,3108 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - Workflow - BrickFlow - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - Skip to content - - -
-
- -
- - - - - - - - -
- - - - - - - -
- -
- - - - -
-
- - - -
-
-
- - - - - - -
-
-
- - - -
-
-
- - - -
-
-
- - - -
-
- - - - - - - -
- - - -
- - - -
- - - - - - -

Classes

- -
- - - -

- brickflow.engine.workflow.Workflow - - - - dataclass - - -

- - -
- - - - - - -
- - - - - -

Attributes

- -
- - - -

-active_task: Optional[str] = None - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-bfs_layers: List[str] - - - property - - -

- - -
-
- -
- -
- - - -

-clusters: List[Cluster] = field(default_factory=lambda : []) - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-common_task_parameters: Optional[Dict[str, str]] = None - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-default_cluster: Optional[Cluster] = None - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-default_task_settings: TaskSettings = TaskSettings() - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-email_notifications: Optional[WorkflowEmailNotifications] = None - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-graph: nx.DiGraph = field(default_factory=nx.DiGraph) - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-libraries: List[TaskLibrary] = field(default_factory=lambda : []) - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-max_concurrent_runs: int = 1 - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-max_tasks_in_workflow: int = 100 - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-name: str - - - property - - -

- - -
-
- -
- -
- - - -

-notification_settings: Optional[WorkflowNotificationSettings] = None - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-permissions: WorkflowPermissions = WorkflowPermissions() - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-prefix: str = field(default_factory=lambda : config('BRICKFLOW_WORKFLOW_PREFIX', '')) - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-run_as_service_principal: Optional[str] = None - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-run_as_user: Optional[str] = None - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-schedule_quartz_expression: Optional[str] = None - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-suffix: str = field(default_factory=lambda : config('BRICKFLOW_WORKFLOW_SUFFIX', '')) - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-tags: Optional[Dict[str, str]] = None - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-tasks: Dict[str, Task] = field(default_factory=lambda : {}) - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-timezone: str = 'UTC' - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-trigger: Optional[Trigger] = None - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -
- - - -

-unique_new_clusters: List[Cluster] - - - property - - -

- - -
-
- -
- -
- - - -

-webhook_notifications: Optional[WorkflowWebhookNotifications] = None - - - class-attribute - instance-attribute - - -

- - -
-
- -
- -

Functions

- -
- - - -

-bfs_task_iter() -> Iterator[Task] - -

- - -
- -
- Source code in brickflow/engine/workflow.py -
def bfs_task_iter(self) -> Iterator[Task]:
-    for layer in self.bfs_layers:
-        for task_key in layer:
-            yield self.get_task(task_key)
-
-
-
- -
- -
- - - -

-check_no_active_task() -> None - -

- - -
- -
- Source code in brickflow/engine/workflow.py -
def check_no_active_task(self) -> None:
-    if self.active_task is not None:
-        raise AnotherActiveTaskError(
-            "You are calling another active task in another task. "
-            "Please abstract the code more."
-        )
-
-
-
- -
- -
- - - -

-dlt_task(task_func: Optional[Callable] = None, name: Optional[str] = None, depends_on: Optional[Union[Callable, str, List[Union[Callable, str]]]] = None) -> Callable - -

- - -
- -
- Source code in brickflow/engine/workflow.py -
def dlt_task(
-    self,
-    task_func: Optional[Callable] = None,
-    name: Optional[str] = None,
-    depends_on: Optional[Union[Callable, str, List[Union[Callable, str]]]] = None,
-) -> Callable:
-    return self.task(task_func, name, task_type=TaskType.DLT, depends_on=depends_on)
-
-
-
- -
- -
- - - -

-get_task(task_id: str) -> Task - -

- - -
- -
- Source code in brickflow/engine/workflow.py -
@wraps_keyerror(TaskNotFoundError, "Unable to find task: ")
-def get_task(self, task_id: str) -> Task:
-    return self.tasks[task_id]
-
-
-
- -
- -
- - - -

-notebook_task(task_func: Optional[Callable] = None, name: Optional[str] = None, cluster: Optional[Cluster] = None, libraries: Optional[List[TaskLibrary]] = None, task_settings: Optional[TaskSettings] = None, depends_on: Optional[Union[Callable, str, List[Union[Callable, str]]]] = None) -> Callable - -

- - -
- -
- Source code in brickflow/engine/workflow.py -
def notebook_task(
-    self,
-    task_func: Optional[Callable] = None,
-    name: Optional[str] = None,
-    cluster: Optional[Cluster] = None,
-    libraries: Optional[List[TaskLibrary]] = None,
-    task_settings: Optional[TaskSettings] = None,
-    depends_on: Optional[Union[Callable, str, List[Union[Callable, str]]]] = None,
-) -> Callable:
-    return self.task(
-        task_func,
-        name,
-        cluster=cluster,
-        libraries=libraries,
-        task_type=TaskType.NOTEBOOK_TASK,
-        task_settings=task_settings,
-        depends_on=depends_on,
-    )
-
-
-
- -
- -
- - - -

-parents(node: str) -> Iterator - -

- - -
- -
- Source code in brickflow/engine/workflow.py -
def parents(self, node: str) -> Iterator:
-    return self.graph.predecessors(node)
-
-
-
- -
- -
- - - -

-pop_task(task_id: str) -> None - -

- - -
- -
- Source code in brickflow/engine/workflow.py -
@wraps_keyerror(TaskNotFoundError, "Unable to find task: ")
-def pop_task(self, task_id: str) -> None:
-    # Pop from dict and graph
-    self.tasks.pop(task_id)
-    self.graph.remove_node(task_id)
-
-
-
- -
- -
- - - -

-task(task_func: Optional[Callable] = None, name: Optional[str] = None, cluster: Optional[Cluster] = None, libraries: Optional[List[TaskLibrary]] = None, task_type: TaskType = TaskType.BRICKFLOW_TASK, depends_on: Optional[Union[Callable, str, List[Union[Callable, str]]]] = None, trigger_rule: BrickflowTriggerRule = BrickflowTriggerRule.ALL_SUCCESS, custom_execute_callback: Optional[Callable] = None, task_settings: Optional[TaskSettings] = None) -> Callable - -

- - -
- -
- Source code in brickflow/engine/workflow.py -
def task(
-    self,
-    task_func: Optional[Callable] = None,
-    name: Optional[str] = None,
-    cluster: Optional[Cluster] = None,
-    libraries: Optional[List[TaskLibrary]] = None,
-    task_type: TaskType = TaskType.BRICKFLOW_TASK,
-    depends_on: Optional[Union[Callable, str, List[Union[Callable, str]]]] = None,
-    trigger_rule: BrickflowTriggerRule = BrickflowTriggerRule.ALL_SUCCESS,
-    custom_execute_callback: Optional[Callable] = None,
-    task_settings: Optional[TaskSettings] = None,
-) -> Callable:
-    if len(self.tasks) >= self.max_tasks_in_workflow:
-        raise ValueError(
-            "You have reached the maximum number of tasks allowed in a databricks workflow. "
-            "Please split your workflow into multiple workflows or raise a feature request "
-            "with your Databricks team."
-        )
-
-    def task_wrapper(f: Callable) -> Callable:
-        task_id = name or f.__name__
-
-        self._add_task(
-            f,
-            task_id,
-            cluster=cluster,
-            task_type=task_type,
-            libraries=libraries,
-            depends_on=depends_on,
-            trigger_rule=trigger_rule,
-            custom_execute_callback=custom_execute_callback,
-            task_settings=task_settings,
-        )
-
-        @functools.wraps(f)
-        def func(*args, **kwargs):  # type: ignore
-            try:
-                self.check_no_active_task()
-                self._set_active_task(task_id)
-                resp = f(*args, **kwargs)
-                return resp
-            except Exception as e:
-                self._reset_active_task()
-                raise e
-            finally:
-                self._reset_active_task()
-
-        return func
-
-    if task_func is not None:
-        if callable(task_func):
-            return task_wrapper(task_func)
-        else:
-            raise NoCallableTaskError(
-                "Please use task decorator against a callable function."
-            )
-
-    return task_wrapper
-
-
-
- -
- -
- - - -

-task_exists(task_id: str) -> bool - -

- - -
- -
- Source code in brickflow/engine/workflow.py -
def task_exists(self, task_id: str) -> bool:
-    return task_id in self.tasks
-
-
-
- -
- -
- - - -

-task_iter() -> Iterator[Task] - -

- - -
- -
- Source code in brickflow/engine/workflow.py -
def task_iter(self) -> Iterator[Task]:
-    for task in self.bfs_task_iter():
-        yield task
-
-
-
- -
- -
- - - -

-unique_new_clusters_dict() -> List[Dict[str, Any]] - -

- - -
- -
- Source code in brickflow/engine/workflow.py -
def unique_new_clusters_dict(self) -> List[Dict[str, Any]]:
-    self.validate_new_clusters_with_unique_names()
-    all_unique_clusters = self.unique_new_clusters
-    return [
-        # job clusters do not need names
-        {
-            "job_cluster_key": c.name,
-            "new_cluster": c.as_dict(remove_fields=["name"]),
-        }
-        for c in all_unique_clusters
-    ]
-
-
-
- -
- -
- - - -

-validate_new_clusters_with_unique_names() -> None - -

- - -
- -
- Source code in brickflow/engine/workflow.py -
def validate_new_clusters_with_unique_names(self) -> None:
-    all_unique_clusters = self.unique_new_clusters
-    unique_name_list: Dict[str, Optional[str]] = {}
-    duplicates = []
-    for cluster in all_unique_clusters:
-        if cluster.name not in unique_name_list:
-            unique_name_list[cluster.name] = None
-        else:
-            duplicates.append(cluster.name)
-
-    duplicate_list = list(set(duplicates))
-    if len(duplicate_list) > 0:
-        raise DuplicateClustersDefinitionError(
-            f"Found duplicate cluster definitions in your workflow: {self.name}, "
-            f"with names: {duplicate_list}"
-        )
-
-
-
- -
- - - -
- -
- -
- -
- - - -

- brickflow.engine.workflow.User - - -

- - -
-

- Bases: ScimEntity

- - - - - - -
- - - - - -

Attributes

- -

Functions

- -
- - - -

-to_access_control() -> Dict[str, str] - -

- - -
- -
- Source code in brickflow/engine/workflow.py -
def to_access_control(self) -> Dict[str, str]:
-    return {"user_name": self.name}
-
-
-
- -
- - - -
- -
- -
- -
- - - -

- brickflow.engine.workflow.Group - - -

- - -
-

- Bases: ScimEntity

- - - - - - -
- - - - - -

Attributes

- -

Functions

- -
- - - -

-to_access_control() -> Dict[str, str] - -

- - -
- -
- Source code in brickflow/engine/workflow.py -
def to_access_control(self) -> Dict[str, str]:
-    return {"group_name": self.name}
-
-
-
- -
- - - -
- -
- -
- -
- - - -

- brickflow.engine.workflow.ServicePrincipal - - -

- - -
-

- Bases: ScimEntity

- - - - - - -
- - - - - -

Attributes

- -

Functions

- -
- - - -

-to_access_control() -> Dict[str, str] - -

- - -
- -
- Source code in brickflow/engine/workflow.py -
def to_access_control(self) -> Dict[str, str]:
-    return {"service_principal_name": self.name}
-
-
-
- -
- - - -
- -
- -
- - - - -
- -
- -
- - - - - - -
-
- - -
- - - -
- - - -
-
-
-
- - - - - - - - - \ No newline at end of file diff --git a/dev/api/workflow_dependency_sensor/index.html b/dev/api/workflow_dependency_sensor/index.html deleted file mode 100644 index 457973d1..00000000 --- a/dev/api/workflow_dependency_sensor/index.html +++ /dev/null @@ -1,1632 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - WorkflowDependencySensor - BrickFlow - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - - Skip to content - - -
-
- -
- - - - - - - - -
- - - - - - - -
- -
- - - - -
-
- - - -
-
-
- - - - - - -
-
-
- - - -
-
-
- - - -
-
-
- - - -
-
- - - - - - - -
- - - -
- - - -
- - - - - -

Attributes

-

Classes

- -
- - - -

-brickflow_plugins.databricks.workflow_dependency_sensor.WorkflowDependencySensor(databricks_host: str, databricks_secrets_scope: str, databricks_secrets_key: str, dependency_job_id: int, delta: timedelta, timeout_seconds: int, poke_interval_seconds: int = 60) - -

- - -
- - -

This is used to have dependencies on the databricks workflow

- -
- Example Usage in your brickflow task -

WorkflowDependencySensor( - databricks_host=https://your_workspace_url.cloud.databricks.com, - databricks_secrets_scope="brickflow-demo-tobedeleted", - databricks_secrets_key="service_principle_id" - dependency_job_id=job_id, - poke_interval=20, - timeout=60, - delta=timedelta(days=1) -)

-
- -
- Source code in brickflow_plugins/databricks/workflow_dependency_sensor.py -
35
-36
-37
-38
-39
-40
-41
-42
-43
-44
-45
-46
-47
-48
-49
-50
-51
-52
-53
def __init__(
-    self,
-    databricks_host: str,
-    databricks_secrets_scope: str,
-    databricks_secrets_key: str,
-    dependency_job_id: int,
-    delta: timedelta,
-    timeout_seconds: int,
-    poke_interval_seconds: int = 60,
-):
-    self.databricks_host = databricks_host
-    self.dependency_job_id = dependency_job_id
-    self.databricks_secrets_scope = databricks_secrets_scope
-    self.databricks_secrets_key = databricks_secrets_key
-    self.poke_interval = poke_interval_seconds
-    self.timeout = timeout_seconds
-    self.delta = delta
-    self.log = logging
-    self.start_time = time.time()
-
-
- - - -
- - - - - -

Attributes

- -
- - - -

-databricks_host = databricks_host - - - instance-attribute - - -

- - -
-
- -
- -
- - - -

-databricks_secrets_key = databricks_secrets_key - - - instance-attribute - - -

- - -
-
- -
- -
- - - -

-databricks_secrets_scope = databricks_secrets_scope - - - instance-attribute - - -

- - -
-
- -
- -
- - - -

-delta = delta - - - instance-attribute - - -

- - -
-
- -
- -
- - - -

-dependency_job_id = dependency_job_id - - - instance-attribute - - -

- - -
-
- -
- -
- - - -

-log = logging - - - instance-attribute - - -

- - -
-
- -
- -
- - - -

-poke_interval = poke_interval_seconds - - - instance-attribute - - -

- - -
-
- -
- -
- - - -

-start_time = time.time() - - - instance-attribute - - -

- - -
-
- -
- -
- - - -

-timeout = timeout_seconds - - - instance-attribute - - -

- - -
-
- -
- -

Functions

- -
- - - -

-execute() - -

- - -
- -
- Source code in brickflow_plugins/databricks/workflow_dependency_sensor.py -
def execute(self):
-    session = self.get_http_session()
-    url = f"{self.databricks_host.rstrip('/')}/api/2.0/jobs/runs/list"
-    headers = {
-        "Authorization": f"Bearer {self.get_token()}",
-        "Content-Type": "application/json",
-    }
-    # http://www.unixtimestampconverter.com/
-    params = {
-        "limit": 25,
-        "job_id": self.dependency_job_id,
-        "expand_tasks": "true",
-        "start_time_from": self.get_the_execution_date(),
-    }
-
-    while True:
-        offset = 0
-        has_more = True
-        while has_more is True:
-            params["offset"] = offset
-            resp = session.get(url, params=params, headers=headers).json()
-            for run in resp.get("runs", []):
-                self.log.info(
-                    f"Found the run_id: {run['run_id']}, and it's result_state is: {run.get('state', {}).get('result_state', None)}"
-                )
-                if run.get("state", {}).get("result_state", None) == "SUCCESS":
-                    self.log.info(f"Found a successful run: {run['run_id']}")
-                    return
-
-            offset += params["limit"]
-            has_more = resp.get("has_more", False)
-            self.log.info(f"This is offset: {offset}, this is has_more: {has_more}")
-
-        self.log.info("Didn't find a successful run yet")
-        if (
-            self.timeout is not None
-            and (time.time() - self.start_time) > self.timeout
-        ):
-            raise WorkflowDependencySensorTimeOutException(f"The job has timed out")
-
-        self.log.info(f"sleeping for: {self.poke_interval}")
-        time.sleep(self.poke_interval)
-
-
-
- -
- -
- - - -

-get_http_session() - - - cached - - -

- - -
- -
- Source code in brickflow_plugins/databricks/workflow_dependency_sensor.py -
75
-76
-77
-78
-79
-80
-81
-82
-83
-84
-85
-86
@functools.lru_cache(maxsize=None)
-def get_http_session(self):
-    session = requests.Session()
-    max_retries = int(os.getenv("DATABRICKS_REQUEST_RETRY_COUNT", 10))
-    retries = self.get_retry_class(max_retries)(
-        total=max_retries,
-        backoff_factor=1,
-        status_forcelist=[500, 501, 502, 503, 504, 429],
-    )
-    session.mount("https://", HTTPAdapter(max_retries=retries))
-    session.mount("http://", HTTPAdapter(max_retries=retries))
-    return session
-
-
-
- -
- -
- - - -

-get_retry_class(max_retries) - -

- - -
- -
- Source code in brickflow_plugins/databricks/workflow_dependency_sensor.py -
55
-56
-57
-58
-59
-60
-61
-62
-63
-64
-65
-66
-67
-68
-69
-70
-71
-72
-73
def get_retry_class(self, max_retries):
-    from urllib3 import Retry
-
-    log = self.log
-
-    class LogRetry(Retry):
-        """
-        Adding extra logs before making a retry request
-        """
-
-        def __init__(self, *args, **kwargs):
-            if (
-                kwargs.get("total", None) != max_retries
-                and kwargs.get("total", None) > 0
-            ):
-                log.info(f"Retrying with kwargs: {kwargs}")
-            super().__init__(*args, **kwargs)
-
-    return LogRetry
-
-
-
- -
- -
- - - -

-get_the_execution_date() -> str - -

- - -
- -
- Source code in brickflow_plugins/databricks/workflow_dependency_sensor.py -
def get_the_execution_date(self) -> str:
-    session = self.get_http_session()
-    url = f"{self.databricks_host.rstrip('/')}/api/2.0/jobs/runs/get"
-    headers = {
-        "Authorization": f"Bearer {self.get_token()}",
-        "Content-Type": "application/json",
-    }
-    run_id = ctx.dbutils_widget_get_or_else("brickflow_parent_run_id", None)
-    if run_id is None:
-        raise WorkflowDependencySensorException(
-            "run_id is empty, brickflow_parent_run_id parameter is not found "
-            "or no value present"
-        )
-    params = {"run_id": run_id}
-    resp = session.get(url, params=params, headers=headers).json()
-
-    # Convert Unix timestamp to datetime object
-    start_time = datetime.fromtimestamp(resp["start_time"] / 1000)
-    execution_date = start_time - self.delta
-    self.log.info(start_time)
-    self.log.info(execution_date)
-    self.log.info(execution_date.strftime("%s"))
-    return execution_date.strftime("%s")
-
-
-
- -
- -
- - - -

-get_token() - - - cached - - -

- - -
- -
- Source code in brickflow_plugins/databricks/workflow_dependency_sensor.py -
@functools.lru_cache
-def get_token(self):
-    return ctx.dbutils.secrets.get(
-        self.databricks_secrets_scope, self.databricks_secrets_key
-    )
-
-
-
- -
- - - -
- -
- -
- -
- - - -

- brickflow_plugins.databricks.workflow_dependency_sensor.WorkflowDependencySensorException - - -

- - -
-

- Bases: Exception

- - - - -
- -
- -
- - - -

- brickflow_plugins.databricks.workflow_dependency_sensor.WorkflowDependencySensorTimeOutException - - -

- - -
-

- Bases: TimeoutError

- - - - -
- -
- - - - -
- -
- -
- - - - - - -
-
- - -
- - - -
- - - -
-
-
-
- - - - - - - - - \ No newline at end of file diff --git a/dev/assets/_mkdocstrings.css b/dev/assets/_mkdocstrings.css deleted file mode 100644 index 049a254b..00000000 --- a/dev/assets/_mkdocstrings.css +++ /dev/null @@ -1,64 +0,0 @@ - -/* Avoid breaking parameter names, etc. in table cells. */ -.doc-contents td code { - word-break: normal !important; -} - -/* No line break before first paragraph of descriptions. */ -.doc-md-description, -.doc-md-description>p:first-child { - display: inline; -} - -/* Max width for docstring sections tables. */ -.doc .md-typeset__table, -.doc .md-typeset__table table { - display: table !important; - width: 100%; -} - -.doc .md-typeset__table tr { - display: table-row; -} - -/* Defaults in Spacy table style. */ -.doc-param-default { - float: right; -} - -/* Keep headings consistent. */ -h1.doc-heading, -h2.doc-heading, -h3.doc-heading, -h4.doc-heading, -h5.doc-heading, -h6.doc-heading { - font-weight: 400; - line-height: 1.5; - color: inherit; - text-transform: none; -} - -h1.doc-heading { - font-size: 1.6rem; -} - -h2.doc-heading { - font-size: 1.2rem; -} - -h3.doc-heading { - font-size: 1.15rem; -} - -h4.doc-heading { - font-size: 1.10rem; -} - -h5.doc-heading { - font-size: 1.05rem; -} - -h6.doc-heading { - font-size: 1rem; -} \ No newline at end of file diff --git a/dev/assets/images/favicon.png b/dev/assets/images/favicon.png deleted file mode 100644 index 1cf13b9f..00000000 Binary files a/dev/assets/images/favicon.png and /dev/null differ diff --git a/dev/assets/javascripts/bundle.c2be25ad.min.js b/dev/assets/javascripts/bundle.c2be25ad.min.js deleted file mode 100644 index f32333ee..00000000 --- a/dev/assets/javascripts/bundle.c2be25ad.min.js +++ /dev/null @@ -1,29 +0,0 @@ -"use strict";(()=>{var Ci=Object.create;var gr=Object.defineProperty;var Ri=Object.getOwnPropertyDescriptor;var ki=Object.getOwnPropertyNames,Ht=Object.getOwnPropertySymbols,Hi=Object.getPrototypeOf,yr=Object.prototype.hasOwnProperty,nn=Object.prototype.propertyIsEnumerable;var rn=(e,t,r)=>t in e?gr(e,t,{enumerable:!0,configurable:!0,writable:!0,value:r}):e[t]=r,P=(e,t)=>{for(var r in t||(t={}))yr.call(t,r)&&rn(e,r,t[r]);if(Ht)for(var r of Ht(t))nn.call(t,r)&&rn(e,r,t[r]);return e};var on=(e,t)=>{var r={};for(var n in e)yr.call(e,n)&&t.indexOf(n)<0&&(r[n]=e[n]);if(e!=null&&Ht)for(var n of Ht(e))t.indexOf(n)<0&&nn.call(e,n)&&(r[n]=e[n]);return r};var Pt=(e,t)=>()=>(t||e((t={exports:{}}).exports,t),t.exports);var Pi=(e,t,r,n)=>{if(t&&typeof t=="object"||typeof t=="function")for(let o of ki(t))!yr.call(e,o)&&o!==r&&gr(e,o,{get:()=>t[o],enumerable:!(n=Ri(t,o))||n.enumerable});return e};var yt=(e,t,r)=>(r=e!=null?Ci(Hi(e)):{},Pi(t||!e||!e.__esModule?gr(r,"default",{value:e,enumerable:!0}):r,e));var sn=Pt((xr,an)=>{(function(e,t){typeof xr=="object"&&typeof an!="undefined"?t():typeof define=="function"&&define.amd?define(t):t()})(xr,function(){"use strict";function e(r){var n=!0,o=!1,i=null,s={text:!0,search:!0,url:!0,tel:!0,email:!0,password:!0,number:!0,date:!0,month:!0,week:!0,time:!0,datetime:!0,"datetime-local":!0};function a(O){return!!(O&&O!==document&&O.nodeName!=="HTML"&&O.nodeName!=="BODY"&&"classList"in O&&"contains"in O.classList)}function f(O){var Qe=O.type,De=O.tagName;return!!(De==="INPUT"&&s[Qe]&&!O.readOnly||De==="TEXTAREA"&&!O.readOnly||O.isContentEditable)}function c(O){O.classList.contains("focus-visible")||(O.classList.add("focus-visible"),O.setAttribute("data-focus-visible-added",""))}function u(O){O.hasAttribute("data-focus-visible-added")&&(O.classList.remove("focus-visible"),O.removeAttribute("data-focus-visible-added"))}function p(O){O.metaKey||O.altKey||O.ctrlKey||(a(r.activeElement)&&c(r.activeElement),n=!0)}function m(O){n=!1}function d(O){a(O.target)&&(n||f(O.target))&&c(O.target)}function h(O){a(O.target)&&(O.target.classList.contains("focus-visible")||O.target.hasAttribute("data-focus-visible-added"))&&(o=!0,window.clearTimeout(i),i=window.setTimeout(function(){o=!1},100),u(O.target))}function v(O){document.visibilityState==="hidden"&&(o&&(n=!0),Y())}function Y(){document.addEventListener("mousemove",N),document.addEventListener("mousedown",N),document.addEventListener("mouseup",N),document.addEventListener("pointermove",N),document.addEventListener("pointerdown",N),document.addEventListener("pointerup",N),document.addEventListener("touchmove",N),document.addEventListener("touchstart",N),document.addEventListener("touchend",N)}function B(){document.removeEventListener("mousemove",N),document.removeEventListener("mousedown",N),document.removeEventListener("mouseup",N),document.removeEventListener("pointermove",N),document.removeEventListener("pointerdown",N),document.removeEventListener("pointerup",N),document.removeEventListener("touchmove",N),document.removeEventListener("touchstart",N),document.removeEventListener("touchend",N)}function N(O){O.target.nodeName&&O.target.nodeName.toLowerCase()==="html"||(n=!1,B())}document.addEventListener("keydown",p,!0),document.addEventListener("mousedown",m,!0),document.addEventListener("pointerdown",m,!0),document.addEventListener("touchstart",m,!0),document.addEventListener("visibilitychange",v,!0),Y(),r.addEventListener("focus",d,!0),r.addEventListener("blur",h,!0),r.nodeType===Node.DOCUMENT_FRAGMENT_NODE&&r.host?r.host.setAttribute("data-js-focus-visible",""):r.nodeType===Node.DOCUMENT_NODE&&(document.documentElement.classList.add("js-focus-visible"),document.documentElement.setAttribute("data-js-focus-visible",""))}if(typeof window!="undefined"&&typeof document!="undefined"){window.applyFocusVisiblePolyfill=e;var t;try{t=new CustomEvent("focus-visible-polyfill-ready")}catch(r){t=document.createEvent("CustomEvent"),t.initCustomEvent("focus-visible-polyfill-ready",!1,!1,{})}window.dispatchEvent(t)}typeof document!="undefined"&&e(document)})});var cn=Pt(Er=>{(function(e){var t=function(){try{return!!Symbol.iterator}catch(c){return!1}},r=t(),n=function(c){var u={next:function(){var p=c.shift();return{done:p===void 0,value:p}}};return r&&(u[Symbol.iterator]=function(){return u}),u},o=function(c){return encodeURIComponent(c).replace(/%20/g,"+")},i=function(c){return decodeURIComponent(String(c).replace(/\+/g," "))},s=function(){var c=function(p){Object.defineProperty(this,"_entries",{writable:!0,value:{}});var m=typeof p;if(m!=="undefined")if(m==="string")p!==""&&this._fromString(p);else if(p instanceof c){var d=this;p.forEach(function(B,N){d.append(N,B)})}else if(p!==null&&m==="object")if(Object.prototype.toString.call(p)==="[object Array]")for(var h=0;hd[0]?1:0}),c._entries&&(c._entries={});for(var p=0;p1?i(d[1]):"")}})})(typeof global!="undefined"?global:typeof window!="undefined"?window:typeof self!="undefined"?self:Er);(function(e){var t=function(){try{var o=new e.URL("b","http://a");return o.pathname="c d",o.href==="http://a/c%20d"&&o.searchParams}catch(i){return!1}},r=function(){var o=e.URL,i=function(f,c){typeof f!="string"&&(f=String(f)),c&&typeof c!="string"&&(c=String(c));var u=document,p;if(c&&(e.location===void 0||c!==e.location.href)){c=c.toLowerCase(),u=document.implementation.createHTMLDocument(""),p=u.createElement("base"),p.href=c,u.head.appendChild(p);try{if(p.href.indexOf(c)!==0)throw new Error(p.href)}catch(O){throw new Error("URL unable to set base "+c+" due to "+O)}}var m=u.createElement("a");m.href=f,p&&(u.body.appendChild(m),m.href=m.href);var d=u.createElement("input");if(d.type="url",d.value=f,m.protocol===":"||!/:/.test(m.href)||!d.checkValidity()&&!c)throw new TypeError("Invalid URL");Object.defineProperty(this,"_anchorElement",{value:m});var h=new e.URLSearchParams(this.search),v=!0,Y=!0,B=this;["append","delete","set"].forEach(function(O){var Qe=h[O];h[O]=function(){Qe.apply(h,arguments),v&&(Y=!1,B.search=h.toString(),Y=!0)}}),Object.defineProperty(this,"searchParams",{value:h,enumerable:!0});var N=void 0;Object.defineProperty(this,"_updateSearchParams",{enumerable:!1,configurable:!1,writable:!1,value:function(){this.search!==N&&(N=this.search,Y&&(v=!1,this.searchParams._fromString(this.search),v=!0))}})},s=i.prototype,a=function(f){Object.defineProperty(s,f,{get:function(){return this._anchorElement[f]},set:function(c){this._anchorElement[f]=c},enumerable:!0})};["hash","host","hostname","port","protocol"].forEach(function(f){a(f)}),Object.defineProperty(s,"search",{get:function(){return this._anchorElement.search},set:function(f){this._anchorElement.search=f,this._updateSearchParams()},enumerable:!0}),Object.defineProperties(s,{toString:{get:function(){var f=this;return function(){return f.href}}},href:{get:function(){return this._anchorElement.href.replace(/\?$/,"")},set:function(f){this._anchorElement.href=f,this._updateSearchParams()},enumerable:!0},pathname:{get:function(){return this._anchorElement.pathname.replace(/(^\/?)/,"/")},set:function(f){this._anchorElement.pathname=f},enumerable:!0},origin:{get:function(){var f={"http:":80,"https:":443,"ftp:":21}[this._anchorElement.protocol],c=this._anchorElement.port!=f&&this._anchorElement.port!=="";return this._anchorElement.protocol+"//"+this._anchorElement.hostname+(c?":"+this._anchorElement.port:"")},enumerable:!0},password:{get:function(){return""},set:function(f){},enumerable:!0},username:{get:function(){return""},set:function(f){},enumerable:!0}}),i.createObjectURL=function(f){return o.createObjectURL.apply(o,arguments)},i.revokeObjectURL=function(f){return o.revokeObjectURL.apply(o,arguments)},e.URL=i};if(t()||r(),e.location!==void 0&&!("origin"in e.location)){var n=function(){return e.location.protocol+"//"+e.location.hostname+(e.location.port?":"+e.location.port:"")};try{Object.defineProperty(e.location,"origin",{get:n,enumerable:!0})}catch(o){setInterval(function(){e.location.origin=n()},100)}}})(typeof global!="undefined"?global:typeof window!="undefined"?window:typeof self!="undefined"?self:Er)});var qr=Pt((Mt,Nr)=>{/*! - * clipboard.js v2.0.11 - * https://clipboardjs.com/ - * - * Licensed MIT © Zeno Rocha - */(function(t,r){typeof Mt=="object"&&typeof Nr=="object"?Nr.exports=r():typeof define=="function"&&define.amd?define([],r):typeof Mt=="object"?Mt.ClipboardJS=r():t.ClipboardJS=r()})(Mt,function(){return function(){var e={686:function(n,o,i){"use strict";i.d(o,{default:function(){return Ai}});var s=i(279),a=i.n(s),f=i(370),c=i.n(f),u=i(817),p=i.n(u);function m(j){try{return document.execCommand(j)}catch(T){return!1}}var d=function(T){var E=p()(T);return m("cut"),E},h=d;function v(j){var T=document.documentElement.getAttribute("dir")==="rtl",E=document.createElement("textarea");E.style.fontSize="12pt",E.style.border="0",E.style.padding="0",E.style.margin="0",E.style.position="absolute",E.style[T?"right":"left"]="-9999px";var H=window.pageYOffset||document.documentElement.scrollTop;return E.style.top="".concat(H,"px"),E.setAttribute("readonly",""),E.value=j,E}var Y=function(T,E){var H=v(T);E.container.appendChild(H);var I=p()(H);return m("copy"),H.remove(),I},B=function(T){var E=arguments.length>1&&arguments[1]!==void 0?arguments[1]:{container:document.body},H="";return typeof T=="string"?H=Y(T,E):T instanceof HTMLInputElement&&!["text","search","url","tel","password"].includes(T==null?void 0:T.type)?H=Y(T.value,E):(H=p()(T),m("copy")),H},N=B;function O(j){return typeof Symbol=="function"&&typeof Symbol.iterator=="symbol"?O=function(E){return typeof E}:O=function(E){return E&&typeof Symbol=="function"&&E.constructor===Symbol&&E!==Symbol.prototype?"symbol":typeof E},O(j)}var Qe=function(){var T=arguments.length>0&&arguments[0]!==void 0?arguments[0]:{},E=T.action,H=E===void 0?"copy":E,I=T.container,q=T.target,Me=T.text;if(H!=="copy"&&H!=="cut")throw new Error('Invalid "action" value, use either "copy" or "cut"');if(q!==void 0)if(q&&O(q)==="object"&&q.nodeType===1){if(H==="copy"&&q.hasAttribute("disabled"))throw new Error('Invalid "target" attribute. Please use "readonly" instead of "disabled" attribute');if(H==="cut"&&(q.hasAttribute("readonly")||q.hasAttribute("disabled")))throw new Error(`Invalid "target" attribute. You can't cut text from elements with "readonly" or "disabled" attributes`)}else throw new Error('Invalid "target" value, use a valid Element');if(Me)return N(Me,{container:I});if(q)return H==="cut"?h(q):N(q,{container:I})},De=Qe;function $e(j){return typeof Symbol=="function"&&typeof Symbol.iterator=="symbol"?$e=function(E){return typeof E}:$e=function(E){return E&&typeof Symbol=="function"&&E.constructor===Symbol&&E!==Symbol.prototype?"symbol":typeof E},$e(j)}function Ei(j,T){if(!(j instanceof T))throw new TypeError("Cannot call a class as a function")}function tn(j,T){for(var E=0;E0&&arguments[0]!==void 0?arguments[0]:{};this.action=typeof I.action=="function"?I.action:this.defaultAction,this.target=typeof I.target=="function"?I.target:this.defaultTarget,this.text=typeof I.text=="function"?I.text:this.defaultText,this.container=$e(I.container)==="object"?I.container:document.body}},{key:"listenClick",value:function(I){var q=this;this.listener=c()(I,"click",function(Me){return q.onClick(Me)})}},{key:"onClick",value:function(I){var q=I.delegateTarget||I.currentTarget,Me=this.action(q)||"copy",kt=De({action:Me,container:this.container,target:this.target(q),text:this.text(q)});this.emit(kt?"success":"error",{action:Me,text:kt,trigger:q,clearSelection:function(){q&&q.focus(),window.getSelection().removeAllRanges()}})}},{key:"defaultAction",value:function(I){return vr("action",I)}},{key:"defaultTarget",value:function(I){var q=vr("target",I);if(q)return document.querySelector(q)}},{key:"defaultText",value:function(I){return vr("text",I)}},{key:"destroy",value:function(){this.listener.destroy()}}],[{key:"copy",value:function(I){var q=arguments.length>1&&arguments[1]!==void 0?arguments[1]:{container:document.body};return N(I,q)}},{key:"cut",value:function(I){return h(I)}},{key:"isSupported",value:function(){var I=arguments.length>0&&arguments[0]!==void 0?arguments[0]:["copy","cut"],q=typeof I=="string"?[I]:I,Me=!!document.queryCommandSupported;return q.forEach(function(kt){Me=Me&&!!document.queryCommandSupported(kt)}),Me}}]),E}(a()),Ai=Li},828:function(n){var o=9;if(typeof Element!="undefined"&&!Element.prototype.matches){var i=Element.prototype;i.matches=i.matchesSelector||i.mozMatchesSelector||i.msMatchesSelector||i.oMatchesSelector||i.webkitMatchesSelector}function s(a,f){for(;a&&a.nodeType!==o;){if(typeof a.matches=="function"&&a.matches(f))return a;a=a.parentNode}}n.exports=s},438:function(n,o,i){var s=i(828);function a(u,p,m,d,h){var v=c.apply(this,arguments);return u.addEventListener(m,v,h),{destroy:function(){u.removeEventListener(m,v,h)}}}function f(u,p,m,d,h){return typeof u.addEventListener=="function"?a.apply(null,arguments):typeof m=="function"?a.bind(null,document).apply(null,arguments):(typeof u=="string"&&(u=document.querySelectorAll(u)),Array.prototype.map.call(u,function(v){return a(v,p,m,d,h)}))}function c(u,p,m,d){return function(h){h.delegateTarget=s(h.target,p),h.delegateTarget&&d.call(u,h)}}n.exports=f},879:function(n,o){o.node=function(i){return i!==void 0&&i instanceof HTMLElement&&i.nodeType===1},o.nodeList=function(i){var s=Object.prototype.toString.call(i);return i!==void 0&&(s==="[object NodeList]"||s==="[object HTMLCollection]")&&"length"in i&&(i.length===0||o.node(i[0]))},o.string=function(i){return typeof i=="string"||i instanceof String},o.fn=function(i){var s=Object.prototype.toString.call(i);return s==="[object Function]"}},370:function(n,o,i){var s=i(879),a=i(438);function f(m,d,h){if(!m&&!d&&!h)throw new Error("Missing required arguments");if(!s.string(d))throw new TypeError("Second argument must be a String");if(!s.fn(h))throw new TypeError("Third argument must be a Function");if(s.node(m))return c(m,d,h);if(s.nodeList(m))return u(m,d,h);if(s.string(m))return p(m,d,h);throw new TypeError("First argument must be a String, HTMLElement, HTMLCollection, or NodeList")}function c(m,d,h){return m.addEventListener(d,h),{destroy:function(){m.removeEventListener(d,h)}}}function u(m,d,h){return Array.prototype.forEach.call(m,function(v){v.addEventListener(d,h)}),{destroy:function(){Array.prototype.forEach.call(m,function(v){v.removeEventListener(d,h)})}}}function p(m,d,h){return a(document.body,m,d,h)}n.exports=f},817:function(n){function o(i){var s;if(i.nodeName==="SELECT")i.focus(),s=i.value;else if(i.nodeName==="INPUT"||i.nodeName==="TEXTAREA"){var a=i.hasAttribute("readonly");a||i.setAttribute("readonly",""),i.select(),i.setSelectionRange(0,i.value.length),a||i.removeAttribute("readonly"),s=i.value}else{i.hasAttribute("contenteditable")&&i.focus();var f=window.getSelection(),c=document.createRange();c.selectNodeContents(i),f.removeAllRanges(),f.addRange(c),s=f.toString()}return s}n.exports=o},279:function(n){function o(){}o.prototype={on:function(i,s,a){var f=this.e||(this.e={});return(f[i]||(f[i]=[])).push({fn:s,ctx:a}),this},once:function(i,s,a){var f=this;function c(){f.off(i,c),s.apply(a,arguments)}return c._=s,this.on(i,c,a)},emit:function(i){var s=[].slice.call(arguments,1),a=((this.e||(this.e={}))[i]||[]).slice(),f=0,c=a.length;for(f;f{"use strict";/*! - * escape-html - * Copyright(c) 2012-2013 TJ Holowaychuk - * Copyright(c) 2015 Andreas Lubbe - * Copyright(c) 2015 Tiancheng "Timothy" Gu - * MIT Licensed - */var rs=/["'&<>]/;Yo.exports=ns;function ns(e){var t=""+e,r=rs.exec(t);if(!r)return t;var n,o="",i=0,s=0;for(i=r.index;i0&&i[i.length-1])&&(c[0]===6||c[0]===2)){r=0;continue}if(c[0]===3&&(!i||c[1]>i[0]&&c[1]=e.length&&(e=void 0),{value:e&&e[n++],done:!e}}};throw new TypeError(t?"Object is not iterable.":"Symbol.iterator is not defined.")}function W(e,t){var r=typeof Symbol=="function"&&e[Symbol.iterator];if(!r)return e;var n=r.call(e),o,i=[],s;try{for(;(t===void 0||t-- >0)&&!(o=n.next()).done;)i.push(o.value)}catch(a){s={error:a}}finally{try{o&&!o.done&&(r=n.return)&&r.call(n)}finally{if(s)throw s.error}}return i}function D(e,t,r){if(r||arguments.length===2)for(var n=0,o=t.length,i;n1||a(m,d)})})}function a(m,d){try{f(n[m](d))}catch(h){p(i[0][3],h)}}function f(m){m.value instanceof et?Promise.resolve(m.value.v).then(c,u):p(i[0][2],m)}function c(m){a("next",m)}function u(m){a("throw",m)}function p(m,d){m(d),i.shift(),i.length&&a(i[0][0],i[0][1])}}function pn(e){if(!Symbol.asyncIterator)throw new TypeError("Symbol.asyncIterator is not defined.");var t=e[Symbol.asyncIterator],r;return t?t.call(e):(e=typeof Ee=="function"?Ee(e):e[Symbol.iterator](),r={},n("next"),n("throw"),n("return"),r[Symbol.asyncIterator]=function(){return this},r);function n(i){r[i]=e[i]&&function(s){return new Promise(function(a,f){s=e[i](s),o(a,f,s.done,s.value)})}}function o(i,s,a,f){Promise.resolve(f).then(function(c){i({value:c,done:a})},s)}}function C(e){return typeof e=="function"}function at(e){var t=function(n){Error.call(n),n.stack=new Error().stack},r=e(t);return r.prototype=Object.create(Error.prototype),r.prototype.constructor=r,r}var It=at(function(e){return function(r){e(this),this.message=r?r.length+` errors occurred during unsubscription: -`+r.map(function(n,o){return o+1+") "+n.toString()}).join(` - `):"",this.name="UnsubscriptionError",this.errors=r}});function Ve(e,t){if(e){var r=e.indexOf(t);0<=r&&e.splice(r,1)}}var Ie=function(){function e(t){this.initialTeardown=t,this.closed=!1,this._parentage=null,this._finalizers=null}return e.prototype.unsubscribe=function(){var t,r,n,o,i;if(!this.closed){this.closed=!0;var s=this._parentage;if(s)if(this._parentage=null,Array.isArray(s))try{for(var a=Ee(s),f=a.next();!f.done;f=a.next()){var c=f.value;c.remove(this)}}catch(v){t={error:v}}finally{try{f&&!f.done&&(r=a.return)&&r.call(a)}finally{if(t)throw t.error}}else s.remove(this);var u=this.initialTeardown;if(C(u))try{u()}catch(v){i=v instanceof It?v.errors:[v]}var p=this._finalizers;if(p){this._finalizers=null;try{for(var m=Ee(p),d=m.next();!d.done;d=m.next()){var h=d.value;try{ln(h)}catch(v){i=i!=null?i:[],v instanceof It?i=D(D([],W(i)),W(v.errors)):i.push(v)}}}catch(v){n={error:v}}finally{try{d&&!d.done&&(o=m.return)&&o.call(m)}finally{if(n)throw n.error}}}if(i)throw new It(i)}},e.prototype.add=function(t){var r;if(t&&t!==this)if(this.closed)ln(t);else{if(t instanceof e){if(t.closed||t._hasParent(this))return;t._addParent(this)}(this._finalizers=(r=this._finalizers)!==null&&r!==void 0?r:[]).push(t)}},e.prototype._hasParent=function(t){var r=this._parentage;return r===t||Array.isArray(r)&&r.includes(t)},e.prototype._addParent=function(t){var r=this._parentage;this._parentage=Array.isArray(r)?(r.push(t),r):r?[r,t]:t},e.prototype._removeParent=function(t){var r=this._parentage;r===t?this._parentage=null:Array.isArray(r)&&Ve(r,t)},e.prototype.remove=function(t){var r=this._finalizers;r&&Ve(r,t),t instanceof e&&t._removeParent(this)},e.EMPTY=function(){var t=new e;return t.closed=!0,t}(),e}();var Sr=Ie.EMPTY;function jt(e){return e instanceof Ie||e&&"closed"in e&&C(e.remove)&&C(e.add)&&C(e.unsubscribe)}function ln(e){C(e)?e():e.unsubscribe()}var Le={onUnhandledError:null,onStoppedNotification:null,Promise:void 0,useDeprecatedSynchronousErrorHandling:!1,useDeprecatedNextContext:!1};var st={setTimeout:function(e,t){for(var r=[],n=2;n0},enumerable:!1,configurable:!0}),t.prototype._trySubscribe=function(r){return this._throwIfClosed(),e.prototype._trySubscribe.call(this,r)},t.prototype._subscribe=function(r){return this._throwIfClosed(),this._checkFinalizedStatuses(r),this._innerSubscribe(r)},t.prototype._innerSubscribe=function(r){var n=this,o=this,i=o.hasError,s=o.isStopped,a=o.observers;return i||s?Sr:(this.currentObservers=null,a.push(r),new Ie(function(){n.currentObservers=null,Ve(a,r)}))},t.prototype._checkFinalizedStatuses=function(r){var n=this,o=n.hasError,i=n.thrownError,s=n.isStopped;o?r.error(i):s&&r.complete()},t.prototype.asObservable=function(){var r=new F;return r.source=this,r},t.create=function(r,n){return new xn(r,n)},t}(F);var xn=function(e){ie(t,e);function t(r,n){var o=e.call(this)||this;return o.destination=r,o.source=n,o}return t.prototype.next=function(r){var n,o;(o=(n=this.destination)===null||n===void 0?void 0:n.next)===null||o===void 0||o.call(n,r)},t.prototype.error=function(r){var n,o;(o=(n=this.destination)===null||n===void 0?void 0:n.error)===null||o===void 0||o.call(n,r)},t.prototype.complete=function(){var r,n;(n=(r=this.destination)===null||r===void 0?void 0:r.complete)===null||n===void 0||n.call(r)},t.prototype._subscribe=function(r){var n,o;return(o=(n=this.source)===null||n===void 0?void 0:n.subscribe(r))!==null&&o!==void 0?o:Sr},t}(x);var Et={now:function(){return(Et.delegate||Date).now()},delegate:void 0};var wt=function(e){ie(t,e);function t(r,n,o){r===void 0&&(r=1/0),n===void 0&&(n=1/0),o===void 0&&(o=Et);var i=e.call(this)||this;return i._bufferSize=r,i._windowTime=n,i._timestampProvider=o,i._buffer=[],i._infiniteTimeWindow=!0,i._infiniteTimeWindow=n===1/0,i._bufferSize=Math.max(1,r),i._windowTime=Math.max(1,n),i}return t.prototype.next=function(r){var n=this,o=n.isStopped,i=n._buffer,s=n._infiniteTimeWindow,a=n._timestampProvider,f=n._windowTime;o||(i.push(r),!s&&i.push(a.now()+f)),this._trimBuffer(),e.prototype.next.call(this,r)},t.prototype._subscribe=function(r){this._throwIfClosed(),this._trimBuffer();for(var n=this._innerSubscribe(r),o=this,i=o._infiniteTimeWindow,s=o._buffer,a=s.slice(),f=0;f0?e.prototype.requestAsyncId.call(this,r,n,o):(r.actions.push(this),r._scheduled||(r._scheduled=ut.requestAnimationFrame(function(){return r.flush(void 0)})))},t.prototype.recycleAsyncId=function(r,n,o){var i;if(o===void 0&&(o=0),o!=null?o>0:this.delay>0)return e.prototype.recycleAsyncId.call(this,r,n,o);var s=r.actions;n!=null&&((i=s[s.length-1])===null||i===void 0?void 0:i.id)!==n&&(ut.cancelAnimationFrame(n),r._scheduled=void 0)},t}(Wt);var Sn=function(e){ie(t,e);function t(){return e!==null&&e.apply(this,arguments)||this}return t.prototype.flush=function(r){this._active=!0;var n=this._scheduled;this._scheduled=void 0;var o=this.actions,i;r=r||o.shift();do if(i=r.execute(r.state,r.delay))break;while((r=o[0])&&r.id===n&&o.shift());if(this._active=!1,i){for(;(r=o[0])&&r.id===n&&o.shift();)r.unsubscribe();throw i}},t}(Dt);var Oe=new Sn(wn);var _=new F(function(e){return e.complete()});function Vt(e){return e&&C(e.schedule)}function Cr(e){return e[e.length-1]}function Ye(e){return C(Cr(e))?e.pop():void 0}function Te(e){return Vt(Cr(e))?e.pop():void 0}function zt(e,t){return typeof Cr(e)=="number"?e.pop():t}var pt=function(e){return e&&typeof e.length=="number"&&typeof e!="function"};function Nt(e){return C(e==null?void 0:e.then)}function qt(e){return C(e[ft])}function Kt(e){return Symbol.asyncIterator&&C(e==null?void 0:e[Symbol.asyncIterator])}function Qt(e){return new TypeError("You provided "+(e!==null&&typeof e=="object"?"an invalid object":"'"+e+"'")+" where a stream was expected. You can provide an Observable, Promise, ReadableStream, Array, AsyncIterable, or Iterable.")}function zi(){return typeof Symbol!="function"||!Symbol.iterator?"@@iterator":Symbol.iterator}var Yt=zi();function Gt(e){return C(e==null?void 0:e[Yt])}function Bt(e){return un(this,arguments,function(){var r,n,o,i;return $t(this,function(s){switch(s.label){case 0:r=e.getReader(),s.label=1;case 1:s.trys.push([1,,9,10]),s.label=2;case 2:return[4,et(r.read())];case 3:return n=s.sent(),o=n.value,i=n.done,i?[4,et(void 0)]:[3,5];case 4:return[2,s.sent()];case 5:return[4,et(o)];case 6:return[4,s.sent()];case 7:return s.sent(),[3,2];case 8:return[3,10];case 9:return r.releaseLock(),[7];case 10:return[2]}})})}function Jt(e){return C(e==null?void 0:e.getReader)}function U(e){if(e instanceof F)return e;if(e!=null){if(qt(e))return Ni(e);if(pt(e))return qi(e);if(Nt(e))return Ki(e);if(Kt(e))return On(e);if(Gt(e))return Qi(e);if(Jt(e))return Yi(e)}throw Qt(e)}function Ni(e){return new F(function(t){var r=e[ft]();if(C(r.subscribe))return r.subscribe(t);throw new TypeError("Provided object does not correctly implement Symbol.observable")})}function qi(e){return new F(function(t){for(var r=0;r=2;return function(n){return n.pipe(e?A(function(o,i){return e(o,i,n)}):de,ge(1),r?He(t):Dn(function(){return new Zt}))}}function Vn(){for(var e=[],t=0;t=2,!0))}function pe(e){e===void 0&&(e={});var t=e.connector,r=t===void 0?function(){return new x}:t,n=e.resetOnError,o=n===void 0?!0:n,i=e.resetOnComplete,s=i===void 0?!0:i,a=e.resetOnRefCountZero,f=a===void 0?!0:a;return function(c){var u,p,m,d=0,h=!1,v=!1,Y=function(){p==null||p.unsubscribe(),p=void 0},B=function(){Y(),u=m=void 0,h=v=!1},N=function(){var O=u;B(),O==null||O.unsubscribe()};return y(function(O,Qe){d++,!v&&!h&&Y();var De=m=m!=null?m:r();Qe.add(function(){d--,d===0&&!v&&!h&&(p=$r(N,f))}),De.subscribe(Qe),!u&&d>0&&(u=new rt({next:function($e){return De.next($e)},error:function($e){v=!0,Y(),p=$r(B,o,$e),De.error($e)},complete:function(){h=!0,Y(),p=$r(B,s),De.complete()}}),U(O).subscribe(u))})(c)}}function $r(e,t){for(var r=[],n=2;ne.next(document)),e}function K(e,t=document){return Array.from(t.querySelectorAll(e))}function z(e,t=document){let r=ce(e,t);if(typeof r=="undefined")throw new ReferenceError(`Missing element: expected "${e}" to be present`);return r}function ce(e,t=document){return t.querySelector(e)||void 0}function _e(){return document.activeElement instanceof HTMLElement&&document.activeElement||void 0}function tr(e){return L(b(document.body,"focusin"),b(document.body,"focusout")).pipe(ke(1),l(()=>{let t=_e();return typeof t!="undefined"?e.contains(t):!1}),V(e===_e()),J())}function Xe(e){return{x:e.offsetLeft,y:e.offsetTop}}function Kn(e){return L(b(window,"load"),b(window,"resize")).pipe(Ce(0,Oe),l(()=>Xe(e)),V(Xe(e)))}function rr(e){return{x:e.scrollLeft,y:e.scrollTop}}function dt(e){return L(b(e,"scroll"),b(window,"resize")).pipe(Ce(0,Oe),l(()=>rr(e)),V(rr(e)))}var Yn=function(){if(typeof Map!="undefined")return Map;function e(t,r){var n=-1;return t.some(function(o,i){return o[0]===r?(n=i,!0):!1}),n}return function(){function t(){this.__entries__=[]}return Object.defineProperty(t.prototype,"size",{get:function(){return this.__entries__.length},enumerable:!0,configurable:!0}),t.prototype.get=function(r){var n=e(this.__entries__,r),o=this.__entries__[n];return o&&o[1]},t.prototype.set=function(r,n){var o=e(this.__entries__,r);~o?this.__entries__[o][1]=n:this.__entries__.push([r,n])},t.prototype.delete=function(r){var n=this.__entries__,o=e(n,r);~o&&n.splice(o,1)},t.prototype.has=function(r){return!!~e(this.__entries__,r)},t.prototype.clear=function(){this.__entries__.splice(0)},t.prototype.forEach=function(r,n){n===void 0&&(n=null);for(var o=0,i=this.__entries__;o0},e.prototype.connect_=function(){!Wr||this.connected_||(document.addEventListener("transitionend",this.onTransitionEnd_),window.addEventListener("resize",this.refresh),va?(this.mutationsObserver_=new MutationObserver(this.refresh),this.mutationsObserver_.observe(document,{attributes:!0,childList:!0,characterData:!0,subtree:!0})):(document.addEventListener("DOMSubtreeModified",this.refresh),this.mutationEventsAdded_=!0),this.connected_=!0)},e.prototype.disconnect_=function(){!Wr||!this.connected_||(document.removeEventListener("transitionend",this.onTransitionEnd_),window.removeEventListener("resize",this.refresh),this.mutationsObserver_&&this.mutationsObserver_.disconnect(),this.mutationEventsAdded_&&document.removeEventListener("DOMSubtreeModified",this.refresh),this.mutationsObserver_=null,this.mutationEventsAdded_=!1,this.connected_=!1)},e.prototype.onTransitionEnd_=function(t){var r=t.propertyName,n=r===void 0?"":r,o=ba.some(function(i){return!!~n.indexOf(i)});o&&this.refresh()},e.getInstance=function(){return this.instance_||(this.instance_=new e),this.instance_},e.instance_=null,e}(),Gn=function(e,t){for(var r=0,n=Object.keys(t);r0},e}(),Jn=typeof WeakMap!="undefined"?new WeakMap:new Yn,Xn=function(){function e(t){if(!(this instanceof e))throw new TypeError("Cannot call a class as a function.");if(!arguments.length)throw new TypeError("1 argument required, but only 0 present.");var r=ga.getInstance(),n=new La(t,r,this);Jn.set(this,n)}return e}();["observe","unobserve","disconnect"].forEach(function(e){Xn.prototype[e]=function(){var t;return(t=Jn.get(this))[e].apply(t,arguments)}});var Aa=function(){return typeof nr.ResizeObserver!="undefined"?nr.ResizeObserver:Xn}(),Zn=Aa;var eo=new x,Ca=$(()=>k(new Zn(e=>{for(let t of e)eo.next(t)}))).pipe(g(e=>L(ze,k(e)).pipe(R(()=>e.disconnect()))),X(1));function he(e){return{width:e.offsetWidth,height:e.offsetHeight}}function ye(e){return Ca.pipe(S(t=>t.observe(e)),g(t=>eo.pipe(A(({target:r})=>r===e),R(()=>t.unobserve(e)),l(()=>he(e)))),V(he(e)))}function bt(e){return{width:e.scrollWidth,height:e.scrollHeight}}function ar(e){let t=e.parentElement;for(;t&&(e.scrollWidth<=t.scrollWidth&&e.scrollHeight<=t.scrollHeight);)t=(e=t).parentElement;return t?e:void 0}var to=new x,Ra=$(()=>k(new IntersectionObserver(e=>{for(let t of e)to.next(t)},{threshold:0}))).pipe(g(e=>L(ze,k(e)).pipe(R(()=>e.disconnect()))),X(1));function sr(e){return Ra.pipe(S(t=>t.observe(e)),g(t=>to.pipe(A(({target:r})=>r===e),R(()=>t.unobserve(e)),l(({isIntersecting:r})=>r))))}function ro(e,t=16){return dt(e).pipe(l(({y:r})=>{let n=he(e),o=bt(e);return r>=o.height-n.height-t}),J())}var cr={drawer:z("[data-md-toggle=drawer]"),search:z("[data-md-toggle=search]")};function no(e){return cr[e].checked}function Ke(e,t){cr[e].checked!==t&&cr[e].click()}function Ue(e){let t=cr[e];return b(t,"change").pipe(l(()=>t.checked),V(t.checked))}function ka(e,t){switch(e.constructor){case HTMLInputElement:return e.type==="radio"?/^Arrow/.test(t):!0;case HTMLSelectElement:case HTMLTextAreaElement:return!0;default:return e.isContentEditable}}function Ha(){return L(b(window,"compositionstart").pipe(l(()=>!0)),b(window,"compositionend").pipe(l(()=>!1))).pipe(V(!1))}function oo(){let e=b(window,"keydown").pipe(A(t=>!(t.metaKey||t.ctrlKey)),l(t=>({mode:no("search")?"search":"global",type:t.key,claim(){t.preventDefault(),t.stopPropagation()}})),A(({mode:t,type:r})=>{if(t==="global"){let n=_e();if(typeof n!="undefined")return!ka(n,r)}return!0}),pe());return Ha().pipe(g(t=>t?_:e))}function le(){return new URL(location.href)}function ot(e){location.href=e.href}function io(){return new x}function ao(e,t){if(typeof t=="string"||typeof t=="number")e.innerHTML+=t.toString();else if(t instanceof Node)e.appendChild(t);else if(Array.isArray(t))for(let r of t)ao(e,r)}function M(e,t,...r){let n=document.createElement(e);if(t)for(let o of Object.keys(t))typeof t[o]!="undefined"&&(typeof t[o]!="boolean"?n.setAttribute(o,t[o]):n.setAttribute(o,""));for(let o of r)ao(n,o);return n}function fr(e){if(e>999){let t=+((e-950)%1e3>99);return`${((e+1e-6)/1e3).toFixed(t)}k`}else return e.toString()}function so(){return location.hash.substring(1)}function Dr(e){let t=M("a",{href:e});t.addEventListener("click",r=>r.stopPropagation()),t.click()}function Pa(e){return L(b(window,"hashchange"),e).pipe(l(so),V(so()),A(t=>t.length>0),X(1))}function co(e){return Pa(e).pipe(l(t=>ce(`[id="${t}"]`)),A(t=>typeof t!="undefined"))}function Vr(e){let t=matchMedia(e);return er(r=>t.addListener(()=>r(t.matches))).pipe(V(t.matches))}function fo(){let e=matchMedia("print");return L(b(window,"beforeprint").pipe(l(()=>!0)),b(window,"afterprint").pipe(l(()=>!1))).pipe(V(e.matches))}function zr(e,t){return e.pipe(g(r=>r?t():_))}function ur(e,t={credentials:"same-origin"}){return ue(fetch(`${e}`,t)).pipe(fe(()=>_),g(r=>r.status!==200?Ot(()=>new Error(r.statusText)):k(r)))}function We(e,t){return ur(e,t).pipe(g(r=>r.json()),X(1))}function uo(e,t){let r=new DOMParser;return ur(e,t).pipe(g(n=>n.text()),l(n=>r.parseFromString(n,"text/xml")),X(1))}function pr(e){let t=M("script",{src:e});return $(()=>(document.head.appendChild(t),L(b(t,"load"),b(t,"error").pipe(g(()=>Ot(()=>new ReferenceError(`Invalid script: ${e}`))))).pipe(l(()=>{}),R(()=>document.head.removeChild(t)),ge(1))))}function po(){return{x:Math.max(0,scrollX),y:Math.max(0,scrollY)}}function lo(){return L(b(window,"scroll",{passive:!0}),b(window,"resize",{passive:!0})).pipe(l(po),V(po()))}function mo(){return{width:innerWidth,height:innerHeight}}function ho(){return b(window,"resize",{passive:!0}).pipe(l(mo),V(mo()))}function bo(){return G([lo(),ho()]).pipe(l(([e,t])=>({offset:e,size:t})),X(1))}function lr(e,{viewport$:t,header$:r}){let n=t.pipe(ee("size")),o=G([n,r]).pipe(l(()=>Xe(e)));return G([r,t,o]).pipe(l(([{height:i},{offset:s,size:a},{x:f,y:c}])=>({offset:{x:s.x-f,y:s.y-c+i},size:a})))}(()=>{function e(n,o){parent.postMessage(n,o||"*")}function t(...n){return n.reduce((o,i)=>o.then(()=>new Promise(s=>{let a=document.createElement("script");a.src=i,a.onload=s,document.body.appendChild(a)})),Promise.resolve())}var r=class extends EventTarget{constructor(n){super(),this.url=n,this.m=i=>{i.source===this.w&&(this.dispatchEvent(new MessageEvent("message",{data:i.data})),this.onmessage&&this.onmessage(i))},this.e=(i,s,a,f,c)=>{if(s===`${this.url}`){let u=new ErrorEvent("error",{message:i,filename:s,lineno:a,colno:f,error:c});this.dispatchEvent(u),this.onerror&&this.onerror(u)}};let o=document.createElement("iframe");o.hidden=!0,document.body.appendChild(this.iframe=o),this.w.document.open(),this.w.document.write(` - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- - - - - - - - -
- - - - - - - -
- -
- - - - -
-
- - - -
-
-
- - - - - - -
-
-
- - - -
-
-
- - - -
-
-
- - - -
-
- - - - - - - -

Bundles (Recommended)

- -

Prerequisites

-
    -
  1. Install Locally (optional):
  2. -
  3. Python >= 3.8
  4. -
  5. Configure the databricks cli cfg file. pip install databricks-cli and then databricks configure -t which - will configure the databricks cli with a token.
  6. -
-

Setup Project

-
    -
  • -

    The first step is to initialize the project. It will do the following:

    -
      -
    1. Create the entrypoint.py file in your workflows module.
    2. -
    3. Update your .gitignore file with the correct directories to ignore.
    4. -
    -
  • -
  • -

    To initialize the project inside the bfs shell run:

    -
    bf init
    -
    -
  • -
  • -

    It will prompt you for the:

    -
      -
    1. Project Name
    2. -
    3. Git https url of your project
    4. -
    5. Workflows Directory Path
    6. -
    7. Brickflow Version
    8. -
    9. Spark Expectations Version
    10. -
    -
  • -
-

gitignore

-
    -
  • -

    For now all the bundle.yml files will be code generated so you can add the following to your .gitignore file:

    -
    **/bundle.yml
    -
    -
  • -
-

Post Setup

-
    -
  • -

    To deploy run the following command

    -
    bf deploy --deploy-mode=bundle -p "<profile>" -wd <workflows directory>
    -
    -
  • -
  • -

    To destroy run the following command

    -
    bf destroy --deploy-mode=bundle -p "<profile>" -wd <workflows directory>
    -
    -
  • -
- - - - - - -
-
- - -
- - - -
- -
- - -
- -
-
-
-
- - - - - - - - - \ No newline at end of file diff --git a/dev/cli/reference/index.html b/dev/cli/reference/index.html deleted file mode 100644 index 8dccb8bf..00000000 --- a/dev/cli/reference/index.html +++ /dev/null @@ -1,1539 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - Commands - BrickFlow - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- - - - - - - - -
- - - - - - - -
- -
- - - - -
-
- - - -
-
-
- - - - - - -
-
-
- - - -
-
-
- - - -
-
-
- - - -
-
- - - - - - - -

Commands

- -

This page provides documentation for our command line tools.

-

bf

-

CLI for managing Databricks Workflows

-

Usage:

-
bf [OPTIONS] COMMAND [ARGS]...
-
-

Options:

-
  --version  Show the version and exit.
-  --help     Show this message and exit.
-
-

bundle

-

CLI for proxying to databricks bundles cli.

-

Usage:

-
bf bundle [OPTIONS]
-
-

Options:

-
  --help  Show this message and exit.
-
-

cdktf

-

CLI for proxying to cdktf cli.

-

Usage:

-
bf cdktf [OPTIONS]
-
-

Options:

-
  --help  Show this message and exit.
-
-

deploy

-

CLI for deploying workflow projects.

-

Usage:

-
bf deploy [OPTIONS]
-
-

Options:

-
  --auto-approve                  Auto approve brickflow pipeline without
-                                  being prompted to approve.
-  --deploy-mode [cdktf|bundle]    Which deployment framework to use to deploy.
-                                  [default: cdktf]
-  --force-acquire-lock            Force acquire lock for databricks bundles
-                                  deploy.
-  -p, --profile TEXT              The databricks profile to use for
-                                  authenticating to databricks during
-                                  deployment.
-  --git-provider TEXT             The github provider for brickflow this is
-                                  used for configuring github on DBX jobs.
-  --git-ref TEXT                  The commit/tag/branch to use in github.
-  -r, --repo-url TEXT             The github url in which to run brickflow
-                                  with.
-  -e, --env TEXT                  Set the environment value, certain tags
-                                  [TBD] get added to the workflows based on
-                                  this value.
-  -w, --workflow TEXT             Provide the workflow file names which you
-                                  want to deploy, each file name separated by
-                                  space! Example: bf deploy -p DEFAULT -l -w
-                                  wf1.py -w wf2.py
-  -wd, --workflows-dir DIRECTORY  Provide the workflow directory that has to
-                                  be deployed
-  -l, --local-mode                Set the environment flag to local and other
-                                  components [TBD] are disabled in local mode.
-  --help                          Show this message and exit.
-
-

destroy

-

CLI for destroying workflow projects.

-

Usage:

-
bf destroy [OPTIONS]
-
-

Options:

-
  --auto-approve                  Auto approve brickflow pipeline without
-                                  being prompted to approve.
-  --deploy-mode [cdktf|bundle]    Which deployment framework to use to deploy.
-                                  [default: cdktf]
-  --force-acquire-lock            Force acquire lock for databricks bundles
-                                  destroy.
-  -p, --profile TEXT              The databricks profile to use for
-                                  authenticating to databricks during
-                                  deployment.
-  --git-provider TEXT             The github provider for brickflow this is
-                                  used for configuring github on DBX jobs.
-  --git-ref TEXT                  The commit/tag/branch to use in github.
-  -r, --repo-url TEXT             The github url in which to run brickflow
-                                  with.
-  -e, --env TEXT                  Set the environment value, certain tags
-                                  [TBD] get added to the workflows based on
-                                  this value.
-  -w, --workflow TEXT             Provide the workflow file names which you
-                                  want to deploy, each file name separated by
-                                  space! Example: bf deploy -p DEFAULT -l -w
-                                  wf1.py -w wf2.py
-  -wd, --workflows-dir DIRECTORY  Provide the workflow directory that has to
-                                  be deployed
-  -l, --local-mode                Set the environment flag to local and other
-                                  components [TBD] are disabled in local mode.
-  --help                          Show this message and exit.
-
-

diff

-

CLI for identifying diff in projects (only cdktf supported).

-

Usage:

-
bf diff [OPTIONS]
-
-

Options:

-
  -p, --profile TEXT              The databricks profile to use for
-                                  authenticating to databricks during
-                                  deployment.
-  --git-provider TEXT             The github provider for brickflow this is
-                                  used for configuring github on DBX jobs.
-  --git-ref TEXT                  The commit/tag/branch to use in github.
-  -r, --repo-url TEXT             The github url in which to run brickflow
-                                  with.
-  -e, --env TEXT                  Set the environment value, certain tags
-                                  [TBD] get added to the workflows based on
-                                  this value.
-  -w, --workflow TEXT             Provide the workflow file names which you
-                                  want to deploy, each file name separated by
-                                  space! Example: bf deploy -p DEFAULT -l -w
-                                  wf1.py -w wf2.py
-  -wd, --workflows-dir DIRECTORY  Provide the workflow directory that has to
-                                  be deployed
-  -l, --local-mode                Set the environment flag to local and other
-                                  components [TBD] are disabled in local mode.
-  --help                          Show this message and exit.
-
-

docs

-

Use to open docs in your browser...

-

Usage:

-
bf docs [OPTIONS]
-
-

Options:

-
  --help  Show this message and exit.
-
-

init

-

Initialize your project with Brickflow...

-

Usage:

-
bf init [OPTIONS]
-
-

Options:

-
  -n, --project-name TEXT
-  -g, --git-https-url TEXT        Provide the github URL for your project,
-                                  example: https://github.com/nike-eda-
-                                  apla/brickflow
-  -wd, --workflows-dir DIRECTORY
-  -bfv, --brickflow-version TEXT
-  -sev, --spark-expectations-version TEXT
-  --help                          Show this message and exit.
-
-

projects

-

Manage one to many brickflow projects

-

Usage:

-
bf projects [OPTIONS] COMMAND [ARGS]...
-
-

Options:

-
  --help  Show this message and exit.
-
-

add

-

Adds a project to the brickflow-multi-project.yml file and a entrypoint.py file in workflows dir

-

Usage:

-
bf projects add [OPTIONS]
-
-

Options:

-
  --name TEXT                     Name of the project
-  --path-from-repo-root-to-project-root DIRECTORY
-                                  Path from repo root to project root
-  --path-project-root-to-workflows-dir TEXT
-                                  Path from project root to workflows dir
-  --deployment-mode [bundle]      Deployment mode
-  -g, --git-https-url TEXT        Provide the github URL for your project,
-                                  example: https://github.com/nike-eda-
-                                  apla/brickflow
-  -bfv, --brickflow-version TEXT
-  -sev, --spark-expectations-version TEXT
-  --skip-entrypoint               Skip creating entrypoint.py file
-  --help                          Show this message and exit.
-
-

deploy

-

Deploy projects in the brickflow-multi-project.yml file

-

Usage:

-
bf projects deploy [OPTIONS]
-
-

Options:

-
  --force-acquire-lock  Force acquire lock for databricks bundles destroy.
-  --auto-approve        Auto approve brickflow pipeline without being prompted
-                        to approve.
-  -p, --profile TEXT    The databricks profile to use for authenticating to
-                        databricks during deployment.
-  --project []          Select the project of workflows you would like to
-                        deploy.
-  -e, --env TEXT        Set the environment value, certain tags [TBD] get
-                        added to the workflows based on this value.
-  --help                Show this message and exit.
-
-

destroy

-

Destroy projects in the brickflow-multi-project.yml file

-

Usage:

-
bf projects destroy [OPTIONS]
-
-

Options:

-
  --force-acquire-lock  Force acquire lock for databricks bundles destroy.
-  --auto-approve        Auto approve brickflow pipeline without being prompted
-                        to approve.
-  -p, --profile TEXT    The databricks profile to use for authenticating to
-                        databricks during deployment.
-  --project []          Select the project of workflows you would like to
-                        deploy.
-  -e, --env TEXT        Set the environment value, certain tags [TBD] get
-                        added to the workflows based on this value.
-  --help                Show this message and exit.
-
-

list

-

Lists all projects in the brickflow-multi-project.yml file

-

Usage:

-
bf projects list [OPTIONS]
-
-

Options:

-
  --help  Show this message and exit.
-
-

remove

-

Removes a project from the brickflow-multi-project.yml file

-

Usage:

-
bf projects remove [OPTIONS]
-
-

Options:

-
  --name []  Name of the project
-  --help     Show this message and exit.
-
-

synth

-

Synth the bundle.yml for project

-

Usage:

-
bf projects synth [OPTIONS]
-
-

Options:

-
  -p, --profile TEXT  The databricks profile to use for authenticating to
-                      databricks during deployment.
-  --project []        Select the project of workflows you would like to
-                      deploy.
-  -e, --env TEXT      Set the environment value, certain tags [TBD] get added
-                      to the workflows based on this value.
-  --help              Show this message and exit.
-
-

sync

-

Synchronize your bundle tree to databricks workspace (only supported by bundle deployment mode).

-

Usage:

-
bf sync [OPTIONS]
-
-

Options:

-
  --deploy-mode [bundle]          Which deployment framework to use to deploy.
-                                  [default: bundle]
-  --watch                         Enable filewatcher to sync files over.
-  --full                          Run a full sync.
-  --interval-duration TEXT        File system polling interval (for --watch).
-  --debug TEXT                    File system polling interval (for --watch).
-  -p, --profile TEXT              The databricks profile to use for
-                                  authenticating to databricks during
-                                  deployment.
-  --git-provider TEXT             The github provider for brickflow this is
-                                  used for configuring github on DBX jobs.
-  --git-ref TEXT                  The commit/tag/branch to use in github.
-  -r, --repo-url TEXT             The github url in which to run brickflow
-                                  with.
-  -e, --env TEXT                  Set the environment value, certain tags
-                                  [TBD] get added to the workflows based on
-                                  this value.
-  -w, --workflow TEXT             Provide the workflow file names which you
-                                  want to deploy, each file name separated by
-                                  space! Example: bf deploy -p DEFAULT -l -w
-                                  wf1.py -w wf2.py
-  -wd, --workflows-dir DIRECTORY  Provide the workflow directory that has to
-                                  be deployed
-  -l, --local-mode                Set the environment flag to local and other
-                                  components [TBD] are disabled in local mode.
-  --help                          Show this message and exit.
-
- - - - - - -
-
- - -
- - - -
- -
- - -
- -
-
-
-
- - - - - - - - - \ No newline at end of file diff --git a/dev/css/custom.css b/dev/css/custom.css deleted file mode 100644 index e26b9484..00000000 --- a/dev/css/custom.css +++ /dev/null @@ -1,32 +0,0 @@ -.md-footer-nav { display: none; } - -.md-footer__inner:not([hidden]) { - display: none -} - -/* Indentation. */ -div.doc-contents:not(.first) { - padding-left: 25px; - border-left: .05rem solid var(--md-typeset-table-color); -} - -/* Mark external links as such. */ -a.autorefs-external::after { - /* https://primer.style/octicons/arrow-up-right-24 */ - background-image: url('data:image/svg+xml,'); - content: ' '; - - display: inline-block; - position: relative; - top: 0.1em; - margin-left: 0.2em; - margin-right: 0.1em; - - height: 1em; - width: 1em; - border-radius: 100%; - background-color: var(--md-typeset-a-color); -} -a.autorefs-external:hover::after { - background-color: var(--md-accent-fg-color); -} \ No newline at end of file diff --git a/dev/environment-variables/index.html b/dev/environment-variables/index.html deleted file mode 100644 index 1a596a63..00000000 --- a/dev/environment-variables/index.html +++ /dev/null @@ -1,1247 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - ENV Variables - BrickFlow - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- - - - - - - - -
- - - - - - - -
- -
- - - - -
-
- - - - - -
-
-
- - - - - - -
-
-
- - - - - - - - - -
-
- - - - - - - -

Environment Variables

-

Note: CDKTF is deprecated please keep in mind as you read the list

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Environment VariableDefault ValueDeploment Mode SupportDescription
BRICKFLOW_ENVlocalbundle & cdktf (deprecated)The environment name for Brickflow
BRICKFLOW_FORCE_DEPLOYFalsecdktf (deprecated)Flag indicating whether to force deployment
BRICKFLOW_DEPLOYMENT_MODEcdktf (deprecated)bundle & cdktf (deprecated)The deployment mode for Brickflow (cdktf, bundles)
BRICKFLOW_GIT_REPON/Abundle & cdktf (deprecated)The URL of the Git repository for Brickflow
BRICKFLOW_GIT_REFN/Abundle & cdktf (deprecated)The Git reference (branch, tag, commit) for Brickflow
BRICKFLOW_GIT_PROVIDERgithubbundle & cdktf (deprecated)The Git provider (e.g., GitHub, GitLab) for Brickflow
DATABRICKS_CONFIG_PROFILEdefaultbundle & cdktf (deprecated)The profile name for Databricks configuration
BRICKFLOW_DEPLOY_ONLY_WORKFLOWSN/Abundle & cdktf (deprecated)List of workflows to deploy exclusively
BRICKFLOW_WORKFLOW_PREFIXN/Abundle & cdktf (deprecated)Prefix to add to workflow names during deployment
BRICKFLOW_WORKFLOW_SUFFIXN/Abundle & cdktf (deprecated)Suffix to add to workflow names during deployment
BRICKFLOW_S3_BACKEND_BUCKETN/Acdktf (deprecated)The name of the S3 bucket for Brickflow backend
BRICKFLOW_S3_BACKEND_KEYN/Acdktf (deprecated)The key or path in the S3 bucket for Brickflow backend
BRICKFLOW_S3_BACKEND_REGIONN/Acdktf (deprecated)The AWS region for the S3 backend
BRICKFLOW_S3_BACKEND_DYNAMODB_TABLEN/Acdktf (deprecated)The DynamoDB table name for tracking S3 backend
BRICKFLOW_INTERACTIVE_MODETruebundle & cdktf (deprecated)Flag indicating whether to enable interactive mode
BRICKFLOW_BUNDLE_BASE_PATH/Users/
${workspace.current_user.userName}
bundleThe base path for the bundle in the S3 backend
BRICKFLOW_BUNDLE_OBJ_NAME.brickflow_bundlesbundleThe name of the folder post appended to your base path
BRICKFLOW_BUNDLE_CLI_EXECdatabricksbundleThe executable command for bundle execution. By default it will be downloaded on the fly.
BRICKFLOW_BUNDLE_NO_DOWNLOADFalsebundleFlag indicating whether to skip downloading the databricks bundle cli. Useful if you are in locked down network.
BRICKFLOW_BUNDLE_CLI_VERSION0.200.0bundleThe version of the bundle CLI tool
BRICKFLOW_MONOREPO_PATH_TO_BUNDLE_ROOTN/Abundle & cdktf (deprecated)The path to the bundle root directory in a monorepo. Default assumes you are not using a monorepo
-

Workflow prefixing or suffixing

-

This allows for adding suffixes or prefixes in the name of the workflow:

-
    -
  • BRICKFLOW_WORKFLOW_PREFIX
  • -
  • BRICKFLOW_WORKFLOW_SUFFIX
  • -
-

Setting the above is semantically the same as doing this in code:

-
wf = Workflow(
-    "thanks",
-    prefix="so_long_",  # same as BRICKFLOW_WORKFLOW_PREFIX
-    suffix="_and_thanks_for_all_the_fish"  # same as BRICKFLOW_WORKFLOW_SUFFIX
-)
-
-

wf.name would then result in "so_long_and_thanks_for_all_the_fish"

-

this is to allow 'unique' names while deploying the same workflow to same environments while still needing to keep them -separate.

-

For example, consider this scenario:

-
    -
  • You have a workflow named inventory_upsert;
  • -
  • Two features are being developed on in parallel in the DEV environment, let's name these feature_1 and feature_2;
  • -
  • If you don't have the ability to uniquely set the name for a workflow, the workflow you are creating in dev (no matter - in which feature/branch they originate from) will always be named dev_inventory_upsert;
  • -
  • with using the prefix/suffix mechanism, we can set a ENV variable and end up with unique names for each feature, - i.e. dev_inventory_upsert_feature_1 and dev_inventory_upsert_feature_2.
  • -
-

Ideal usage for this is in CI/CD pipelines.

- - - - - - - - -
-
- - -
- - - -
- -
- - -
- -
-
-
-
- - - - - - - - - \ No newline at end of file diff --git a/dev/faq/airflow-operator-rfc/index.html b/dev/faq/airflow-operator-rfc/index.html deleted file mode 100644 index 2079632a..00000000 --- a/dev/faq/airflow-operator-rfc/index.html +++ /dev/null @@ -1,1583 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - Airflow operator rfc - BrickFlow - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- - - - - - - - -
- - - - - - - -
- -
- - - - -
-
- - - -
-
-
- - - - - - -
-
-
- - - -
-
-
- - - -
-
-
- - - -
-
- - - - - - - -

Airflow operator rfc

- -

Airflow Operator - Brickflow Support RFC

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Airflow OperatorDatabricks Native EquivalentWill ImplementLink to IssuesLink to ImplLink to Docs
Snowflake Operator✅❎
Branch Python Operator✅❎
Slack Operator✅❎
Email Operator✅❎
Task Dependency Sensor✅❎
Canary Operator✅❎
Bash Operator✅❎
Short Circuit Operator✅❎
S3 Sensor✅❎
Compute Bash OperatorLook at Bash Operator❎❎❎❎
Compute Python OperatorUse a task❎❎❎❎
EMR OperatorUse a task❎❎❎❎
Spark OperatorUse a task❎❎❎❎
Python OperatorUse a task❎❎❎❎
Dummy OperatorUse a task❎❎❎❎
Genie Snowflake OperatorLook at snowflake operator❎❎❎❎
Genie Hive OperatorN/A❎❎❎❎
Genie S3 Dist CP OperatorN/A❎❎❎❎
Athena OperatorUse DBSQL❎❎❎❎
Nike EMR OperatorUse a task❎❎❎❎
Nike Spark Submit OperatorUse a task❎❎❎❎
Compute S3 Prefix SensorLook at S3 sensor❎❎❎❎
-

Operators

-

Snowflake operator

-

Branch python operator

-

Slack operator

-

Email operator

-

Task dependency sensor

-

Bash operator

-

Short circuit operator

-

S3 Prefix Sensor

-

Operators which will not be supported

-

Compute bash operator

-

Alternative:

-

Compute python operator

-

Alternative:

-

Emr operator

-

Alternative:

-

Spark operator

-

Alternative:

-

Python operator

-

Alternative:

-

Dummy operator

-

Alternative:

-

Canary operator

-

Alternative:

-

Genie snowflake operator

-

Alternative:

-

Genie hive operator

-

Alternative:

-

Genie s3 dist cp operator

-

Alternative:

-

Athena operator

-

Alternative:

-

Nike emr operator

-

Alternative:

-

Nike spark submit operator

-

Alternative:

-

Compute s3 prefix sensor

-

Alternative:

- - - - - - -
-
- - -
- - - -
- -
- - -
- -
-
-
-
- - - - - - - - - \ No newline at end of file diff --git a/dev/faq/faq/index.html b/dev/faq/faq/index.html deleted file mode 100644 index a967ed8a..00000000 --- a/dev/faq/faq/index.html +++ /dev/null @@ -1,973 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - Faq - BrickFlow - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
-
- -
- - - - - - - - -
- - - - - - - -
- -
- - - - -
-
- - - -
-
-
- - - - - - -
-
-
- - - -
-
-
- - - -
-
-
- - - -
-
- - - - - - - -

Faq

- -

under_maintainance

- - - - - - -
-
- - -
- - - -
- -
- - -
- -
-
-
-
- - - - - - - - - \ No newline at end of file diff --git a/dev/highlevel/index.html b/dev/highlevel/index.html deleted file mode 100644 index 112e84b9..00000000 --- a/dev/highlevel/index.html +++ /dev/null @@ -1,1107 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - HighLevel - BrickFlow - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- - - - - - - - -
- - - - - - - -
- -
- - - - -
-
- - - -
-
-
- - - - - - -
-
-
- - - -
-
-
- - - -
-
-
- - - -
-
- - - - - - - -

HighLevel

- -

Brickflow Overview

-

The objective of Brickflow is to provide a thin layer on top of databricks workflows to help deploy -and manage workflows in Databricks. It also provides plugins/extras to be able to run airflow -operators directly in the workflows.

-

Brickflow to Airflow Term Mapping

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
ObjectAirflowBrickflow
Collection of WorkflowsAirflow Cluster (Airflow Dag Bag)Project/Entrypoint
WorkflowAirflow DagWorkflow
TaskAirflow OperatorTask
ScheduleUnix CronQuartz Cron
Inter Task CommunicationXComsTask Values
Connections to External ServicesAirflow ConnectionsCerberus Connection Builder
Variables to TasksVariablesTask Parameters
Context values (execution_date, etc.)Airflow Macros, context["ti"]ctx.<task parameter>
- - - - - - -
-
- - -
- - - -
- -
- - -
- -
-
-
-
- - - - - - - - - \ No newline at end of file diff --git a/dev/img/logo.png b/dev/img/logo.png deleted file mode 100644 index b618b2aa..00000000 Binary files a/dev/img/logo.png and /dev/null differ diff --git a/dev/img/logo.svg b/dev/img/logo.svg deleted file mode 100644 index b61bde05..00000000 --- a/dev/img/logo.svg +++ /dev/null @@ -1,69 +0,0 @@ - - - - - - - - diff --git a/dev/img/maintainance.png b/dev/img/maintainance.png deleted file mode 100644 index acc528a4..00000000 Binary files a/dev/img/maintainance.png and /dev/null differ diff --git a/dev/index.html b/dev/index.html deleted file mode 100644 index 7efd4a60..00000000 --- a/dev/index.html +++ /dev/null @@ -1,1089 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - BrickFlow - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- - - - - - - - -
- - - - - - - -
- -
- - - - -
-
- - - - - - - - - - - -
-
-
- - - -
-
-
- - - -
-
- - - - - - - -

BrickFlow

-

BrickFlow is a CLI tool for development and deployment of Python based Databricks Workflows in a declarative way.

-

Concept

-

brickflow aims to improve development experience for building any pipelines on databricks via:

-
    -
  • Providing a declarative way to describe workflows via decorators
  • -
  • Provide intelligent defaults to compute targets
  • -
  • Provide a code and git first approach to managing and deploying workflows
  • -
  • Use IAC such as terraform to manage the state and deploy jobs and their infrastructure.
  • -
  • CLI tool helps facilitate setting up a projects
  • -
  • Provides additional functionality through the context library to be able to do additional things for workflows.
  • -
-

Feedback

-

Issues with brickflow? Found a bug? -Have a great idea for an addition? Want to improve the documentation? Please feel -free to file an issue.

-

Contributing

-

To contribute please fork and create a pull request.

- - - - - - - - -
-
- - -
- - - -
- -
- - -
- -
-
-
-
- - - - - - - - - \ No newline at end of file diff --git a/dev/limitations/index.html b/dev/limitations/index.html deleted file mode 100644 index d7b8bbd8..00000000 --- a/dev/limitations/index.html +++ /dev/null @@ -1,992 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - Limitations - BrickFlow - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
-
- -
- - - - - - - - -
- - - - - - - -
- -
- - - - -
-
- - - -
-
-
- - - - - - -
-
-
- - - -
-
-
- - - -
-
-
- - - -
-
- - - - - - - -

Limitations

- -
    -
  • Docs (WIP)
  • -
  • Python wheel tasks
  • -
  • Support for DBSQL tasks using SQL Warehouses
  • -
  • CLI for visualizing workflow locally using a graphing tool
  • -
- - - - - - -
-
- - -
- - - -
- -
- - -
- -
-
-
-
- - - - - - - - - \ No newline at end of file diff --git a/dev/objects.inv b/dev/objects.inv deleted file mode 100644 index 1b411ee6..00000000 Binary files a/dev/objects.inv and /dev/null differ diff --git a/dev/projects/index.html b/dev/projects/index.html deleted file mode 100644 index bb067986..00000000 --- a/dev/projects/index.html +++ /dev/null @@ -1,1027 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - Projects - BrickFlow - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
-
- -
- - - - - - - - -
- - - - - - - -
- -
- - - - -
-
- - - -
-
-
- - - - - - -
-
-
- - - -
-
-
- - - -
-
-
- - - -
-
- - - - - - - -

Projects

- -

The project is similar to a map cluster it can be composed of various different Workflows or dags.

-

Here is an example of an entrypoint. -Click the plus buttons to understand all the parts of the entrypoint file.

-
entrypoint.py
# Databricks notebook source  # (1)!
-
-import examples.brickflow_examples.workflows
-
-from brickflow import Project, PypiTaskLibrary, MavenTaskLibrary
-
-ARTIFACTORY = ""
-
-
-def main() -> None:
-    """Project entrypoint"""
-    with Project(
-            "brickflow-demo",  # (3)!
-            git_repo="https://github.com/Nike-Inc/brickflow",  # (4)!
-            provider="github",  # (5)!
-            libraries=[  # (6)!
-                PypiTaskLibrary(package="brickflow==1.0.0 --extra-index-url " + ARTIFACTORY),
-                MavenTaskLibrary(coordinates="com.cronutils:cron-utils:9.2.0"),
-            ],
-    ) as f:
-        f.add_pkg(examples.brickflow_examples.workflows)  # (7)!
-
-
-if __name__ == "__main__":  # (2)!
-    main()
-
-
    -
  1. Uploading this Python file into databricks with this comment on the first line treats the python file - as a notebook.
  2. -
  3. This makes sure this only runs when this file is run via python entrypoint.py
  4. -
  5. This is the project name you provided when you do bf init
  6. -
  7. This is the git repo that is introspected when running bf init
  8. -
  9. This is the github provider that you decide on.
  10. -
  11. You can provide a list of packages that need to be installed in all of your clusters when running ETL.
  12. -
  13. You can add multiple packages in your project where you are defining workflows.
  14. -
- - - - - - -
-
- - -
- - - -
- -
- - -
- -
-
-
-
- - - - - - - - - \ No newline at end of file diff --git a/dev/quickstart/index.html b/dev/quickstart/index.html deleted file mode 100644 index d33895c3..00000000 --- a/dev/quickstart/index.html +++ /dev/null @@ -1,1192 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - CDKTF (Deprecated) - BrickFlow - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- - - - - - - - -
- - - - - - - -
- -
- - - - -
-
- - - -
-
-
- - - - - - -
-
-
- - - -
-
-
- - - -
-
-
- - - -
-
- - - - - - - -

CDKTF (Deprecated)

- -

Prerequisites

-
    -
  1. You need either of the following installed:
  2. -
  3. Install via docker:
      -
    1. Docker installed on your laptop
    2. -
    -
  4. -
  5. Install Locally (optional):
      -
    1. Python >= 3.8
    2. -
    3. Install nodejs == 18.14.0
    4. -
    5. Install terraform 1.3.1
    6. -
    7. Install cerberus-python-client
    8. -
    -
  6. -
  7. Configure your github integration to your repos using SSH.
  8. -
  9. Configure the databricks cli cfg file. pip install databricks-cli and then databricks configure -t which - will configure the databricks cli with a token.
  10. -
-

Install Via Docker

-

We recommend to use docker container for development purposes as it's easier to have version upgrades by changing the docker version.

-
    -
  • -

    Add the following alias to your profile or zsh_profile:

    -
    alias bfs='docker run -it --rm --name brickflow -v "$PWD":/usr/src/brickflow -v ~/.databrickscfg:/root/.databrickscfg:ro -v ~/.ssh:/root/.ssh:ro -w /usr/src/brickflow <DOCKERHUB_URL_REPLACE>/brickflow:latest'
    -
    -
  • -
  • -

    Please change your directory to the root of your project. Then run the bfs command.

    -
    bfs
    -
    -
  • -
  • -

    This will launch the bash shell inside the container. It will do the following:

    -
      -
    1. Mount your current working directory as read-write to the working directory in the container.
    2. -
    3. Mount your ~/.ssh directory as read-only to the ~/.ssh in the container.
    4. -
    5. Mount your ~/.databrickscfg file as read-only to the ~/.databrickscfg in the container.
    6. -
    -
  • -
  • -

    You will also need to install any required packages of your respective project inside the docker container.

    -
  • -
-

Upgrade the brickflow container

-
    -
  • -

    If the brickflow version in your container is outdated and needed to upgrade then run the below command in your shell which pull the latest docker image

    -
    docker pull <DOCKERHUB_URL_REPLACE>/brickflow:latest
    -
    -
  • -
-

Install locally (optional if you choose not to use docker)

-

Alternatively instead of docker you can install locally but you will need to resolve all the deps.

-

The project relies on terraform and cdktf to deploy your python projects.

-
    -
  1. Install brew if not installed already using - brew-install
  2. -
  3. Install node using brew install node
  4. -
  5. Install cdktf-cli via npm install -g cdktf-cli
  6. -
  7. Install the brickflow package via pip install brickflow[deploy]
  8. -
  9. Install the cerberus if needed via pip install brickflow[cerberus]
  10. -
  11. Install the airflow if needed via pip install brickflow[airflow]
  12. -
-

Setup Project

-
    -
  • -

    The first step is to initialize the project. It will do the following:

    -
      -
    1. Create the entrypoint.py file in your workflows module.
    2. -
    3. Update your .gitignore file with the correct directories to ignore.
    4. -
    -
  • -
  • -

    To initialize the project inside the bfs shell run:

    -
    bf init
    -
    -
  • -
  • -

    It will prompt you for the:

    -
      -
    1. Project Name
    2. -
    3. Git https url of your project
    4. -
    5. Workflows Directory Path
    6. -
    7. Brickflow Version
    8. -
    9. Spark Expectations Version
    10. -
    -
  • -
- - - - - - -
-
- - -
- - - -
- -
- - -
- -
-
-
-
- - - - - - - - - \ No newline at end of file diff --git a/dev/search/search_index.json b/dev/search/search_index.json deleted file mode 100644 index 6cb95fce..00000000 --- a/dev/search/search_index.json +++ /dev/null @@ -1 +0,0 @@ -{"config":{"lang":["en"],"separator":"[\\s\\-]+","pipeline":["stopWordFilter"]},"docs":[{"location":"","title":"BrickFlow","text":"

BrickFlow is a CLI tool for development and deployment of Python based Databricks Workflows in a declarative way.

"},{"location":"#concept","title":"Concept","text":"

brickflow aims to improve development experience for building any pipelines on databricks via:

  • Providing a declarative way to describe workflows via decorators
  • Provide intelligent defaults to compute targets
  • Provide a code and git first approach to managing and deploying workflows
  • Use IAC such as terraform to manage the state and deploy jobs and their infrastructure.
  • CLI tool helps facilitate setting up a projects
  • Provides additional functionality through the context library to be able to do additional things for workflows.
"},{"location":"#feedback","title":"Feedback","text":"

Issues with brickflow? Found a bug? Have a great idea for an addition? Want to improve the documentation? Please feel free to file an issue.

"},{"location":"#contributing","title":"Contributing","text":"

To contribute please fork and create a pull request.

"},{"location":"bundles-quickstart/","title":"Bundles (Recommended)","text":""},{"location":"bundles-quickstart/#prerequisites","title":"Prerequisites","text":"
  1. Install Locally (optional):
  2. Python >= 3.8
  3. Configure the databricks cli cfg file. pip install databricks-cli and then databricks configure -t which will configure the databricks cli with a token.
"},{"location":"bundles-quickstart/#setup-project","title":"Setup Project","text":"
  • The first step is to initialize the project. It will do the following:

    1. Create the entrypoint.py file in your workflows module.
    2. Update your .gitignore file with the correct directories to ignore.
  • To initialize the project inside the bfs shell run:

    bf init\n
  • It will prompt you for the:

    1. Project Name
    2. Git https url of your project
    3. Workflows Directory Path
    4. Brickflow Version
    5. Spark Expectations Version
"},{"location":"bundles-quickstart/#gitignore","title":"gitignore","text":"
  • For now all the bundle.yml files will be code generated so you can add the following to your .gitignore file:

    **/bundle.yml\n
"},{"location":"bundles-quickstart/#post-setup","title":"Post Setup","text":"
  • To deploy run the following command

    bf deploy --deploy-mode=bundle -p \"<profile>\" -wd <workflows directory>\n
  • To destroy run the following command

    bf destroy --deploy-mode=bundle -p \"<profile>\" -wd <workflows directory>\n
"},{"location":"environment-variables/","title":"ENV Variables","text":""},{"location":"environment-variables/#environment-variables","title":"Environment Variables","text":""},{"location":"environment-variables/#note-cdktf-is-deprecated-please-keep-in-mind-as-you-read-the-list","title":"Note: CDKTF is deprecated please keep in mind as you read the list","text":"Environment Variable Default Value Deploment Mode Support Description BRICKFLOW_ENV local bundle & cdktf (deprecated) The environment name for Brickflow BRICKFLOW_FORCE_DEPLOY False cdktf (deprecated) Flag indicating whether to force deployment BRICKFLOW_DEPLOYMENT_MODE cdktf (deprecated) bundle & cdktf (deprecated) The deployment mode for Brickflow (cdktf, bundles) BRICKFLOW_GIT_REPO N/A bundle & cdktf (deprecated) The URL of the Git repository for Brickflow BRICKFLOW_GIT_REF N/A bundle & cdktf (deprecated) The Git reference (branch, tag, commit) for Brickflow BRICKFLOW_GIT_PROVIDER github bundle & cdktf (deprecated) The Git provider (e.g., GitHub, GitLab) for Brickflow DATABRICKS_CONFIG_PROFILE default bundle & cdktf (deprecated) The profile name for Databricks configuration BRICKFLOW_DEPLOY_ONLY_WORKFLOWS N/A bundle & cdktf (deprecated) List of workflows to deploy exclusively BRICKFLOW_WORKFLOW_PREFIX N/A bundle & cdktf (deprecated) Prefix to add to workflow names during deployment BRICKFLOW_WORKFLOW_SUFFIX N/A bundle & cdktf (deprecated) Suffix to add to workflow names during deployment BRICKFLOW_S3_BACKEND_BUCKET N/A cdktf (deprecated) The name of the S3 bucket for Brickflow backend BRICKFLOW_S3_BACKEND_KEY N/A cdktf (deprecated) The key or path in the S3 bucket for Brickflow backend BRICKFLOW_S3_BACKEND_REGION N/A cdktf (deprecated) The AWS region for the S3 backend BRICKFLOW_S3_BACKEND_DYNAMODB_TABLE N/A cdktf (deprecated) The DynamoDB table name for tracking S3 backend BRICKFLOW_INTERACTIVE_MODE True bundle & cdktf (deprecated) Flag indicating whether to enable interactive mode BRICKFLOW_BUNDLE_BASE_PATH /Users/${workspace.current_user.userName} bundle The base path for the bundle in the S3 backend BRICKFLOW_BUNDLE_OBJ_NAME .brickflow_bundles bundle The name of the folder post appended to your base path BRICKFLOW_BUNDLE_CLI_EXEC databricks bundle The executable command for bundle execution. By default it will be downloaded on the fly. BRICKFLOW_BUNDLE_NO_DOWNLOAD False bundle Flag indicating whether to skip downloading the databricks bundle cli. Useful if you are in locked down network. BRICKFLOW_BUNDLE_CLI_VERSION 0.200.0 bundle The version of the bundle CLI tool BRICKFLOW_MONOREPO_PATH_TO_BUNDLE_ROOT N/A bundle & cdktf (deprecated) The path to the bundle root directory in a monorepo. Default assumes you are not using a monorepo"},{"location":"environment-variables/#workflow-prefixing-or-suffixing","title":"Workflow prefixing or suffixing","text":"

This allows for adding suffixes or prefixes in the name of the workflow:

  • BRICKFLOW_WORKFLOW_PREFIX
  • BRICKFLOW_WORKFLOW_SUFFIX

Setting the above is semantically the same as doing this in code:

wf = Workflow(\n\"thanks\",\nprefix=\"so_long_\",  # same as BRICKFLOW_WORKFLOW_PREFIX\nsuffix=\"_and_thanks_for_all_the_fish\"  # same as BRICKFLOW_WORKFLOW_SUFFIX\n)\n

wf.name would then result in \"so_long_and_thanks_for_all_the_fish\"

this is to allow 'unique' names while deploying the same workflow to same environments while still needing to keep them separate.

For example, consider this scenario:

  • You have a workflow named inventory_upsert;
  • Two features are being developed on in parallel in the DEV environment, let's name these feature_1 and feature_2;
  • If you don't have the ability to uniquely set the name for a workflow, the workflow you are creating in dev (no matter in which feature/branch they originate from) will always be named dev_inventory_upsert;
  • with using the prefix/suffix mechanism, we can set a ENV variable and end up with unique names for each feature, i.e. dev_inventory_upsert_feature_1 and dev_inventory_upsert_feature_2.

Ideal usage for this is in CI/CD pipelines.

"},{"location":"highlevel/","title":"HighLevel","text":""},{"location":"highlevel/#brickflow-overview","title":"Brickflow Overview","text":"

The objective of Brickflow is to provide a thin layer on top of databricks workflows to help deploy and manage workflows in Databricks. It also provides plugins/extras to be able to run airflow operators directly in the workflows.

"},{"location":"highlevel/#brickflow-to-airflow-term-mapping","title":"Brickflow to Airflow Term Mapping","text":"Object Airflow Brickflow Collection of Workflows Airflow Cluster (Airflow Dag Bag) Project/Entrypoint Workflow Airflow Dag Workflow Task Airflow Operator Task Schedule Unix Cron Quartz Cron Inter Task Communication XComs Task Values Connections to External Services Airflow Connections Cerberus Connection Builder Variables to Tasks Variables Task Parameters Context values (execution_date, etc.) Airflow Macros, context[\"ti\"] ctx.<task parameter>"},{"location":"limitations/","title":"Limitations","text":"
  • Docs (WIP)
  • Python wheel tasks
  • Support for DBSQL tasks using SQL Warehouses
  • CLI for visualizing workflow locally using a graphing tool
"},{"location":"projects/","title":"Projects","text":"

The project is similar to a map cluster it can be composed of various different Workflows or dags.

Here is an example of an entrypoint. Click the plus buttons to understand all the parts of the entrypoint file.

entrypoint.py
# Databricks notebook source  # (1)!\nimport examples.brickflow_examples.workflows\nfrom brickflow import Project, PypiTaskLibrary, MavenTaskLibrary\nARTIFACTORY = \"\"\ndef main() -> None:\n\"\"\"Project entrypoint\"\"\"\nwith Project(\n\"brickflow-demo\",  # (3)!\ngit_repo=\"https://github.com/Nike-Inc/brickflow\",  # (4)!\nprovider=\"github\",  # (5)!\nlibraries=[  # (6)!\nPypiTaskLibrary(package=\"brickflow==1.0.0 --extra-index-url \" + ARTIFACTORY),\nMavenTaskLibrary(coordinates=\"com.cronutils:cron-utils:9.2.0\"),\n],\n) as f:\nf.add_pkg(examples.brickflow_examples.workflows)  # (7)!\nif __name__ == \"__main__\":  # (2)!\nmain()\n
  1. Uploading this Python file into databricks with this comment on the first line treats the python file as a notebook.
  2. This makes sure this only runs when this file is run via python entrypoint.py
  3. This is the project name you provided when you do bf init
  4. This is the git repo that is introspected when running bf init
  5. This is the github provider that you decide on.
  6. You can provide a list of packages that need to be installed in all of your clusters when running ETL.
  7. You can add multiple packages in your project where you are defining workflows.
"},{"location":"quickstart/","title":"CDKTF (Deprecated)","text":""},{"location":"quickstart/#prerequisites","title":"Prerequisites","text":"
  1. You need either of the following installed:
  2. Install via docker:
    1. Docker installed on your laptop
  3. Install Locally (optional):
    1. Python >= 3.8
    2. Install nodejs == 18.14.0
    3. Install terraform 1.3.1
    4. Install cerberus-python-client
  4. Configure your github integration to your repos using SSH.
  5. Configure the databricks cli cfg file. pip install databricks-cli and then databricks configure -t which will configure the databricks cli with a token.
"},{"location":"quickstart/#install-via-docker","title":"Install Via Docker","text":"

We recommend to use docker container for development purposes as it's easier to have version upgrades by changing the docker version.

  • Add the following alias to your profile or zsh_profile:

    alias bfs='docker run -it --rm --name brickflow -v \"$PWD\":/usr/src/brickflow -v ~/.databrickscfg:/root/.databrickscfg:ro -v ~/.ssh:/root/.ssh:ro -w /usr/src/brickflow <DOCKERHUB_URL_REPLACE>/brickflow:latest'\n
  • Please change your directory to the root of your project. Then run the bfs command.

    bfs\n
  • This will launch the bash shell inside the container. It will do the following:

    1. Mount your current working directory as read-write to the working directory in the container.
    2. Mount your ~/.ssh directory as read-only to the ~/.ssh in the container.
    3. Mount your ~/.databrickscfg file as read-only to the ~/.databrickscfg in the container.
  • You will also need to install any required packages of your respective project inside the docker container.

"},{"location":"quickstart/#upgrade-the-brickflow-container","title":"Upgrade the brickflow container","text":"
  • If the brickflow version in your container is outdated and needed to upgrade then run the below command in your shell which pull the latest docker image

    docker pull <DOCKERHUB_URL_REPLACE>/brickflow:latest\n
"},{"location":"quickstart/#install-locally-optional-if-you-choose-not-to-use-docker","title":"Install locally (optional if you choose not to use docker)","text":"

Alternatively instead of docker you can install locally but you will need to resolve all the deps.

The project relies on terraform and cdktf to deploy your python projects.

  1. Install brew if not installed already using - brew-install
  2. Install node using brew install node
  3. Install cdktf-cli via npm install -g cdktf-cli
  4. Install the brickflow package via pip install brickflow[deploy]
  5. Install the cerberus if needed via pip install brickflow[cerberus]
  6. Install the airflow if needed via pip install brickflow[airflow]
"},{"location":"quickstart/#setup-project","title":"Setup Project","text":"
  • The first step is to initialize the project. It will do the following:

    1. Create the entrypoint.py file in your workflows module.
    2. Update your .gitignore file with the correct directories to ignore.
  • To initialize the project inside the bfs shell run:

    bf init\n
  • It will prompt you for the:

    1. Project Name
    2. Git https url of your project
    3. Workflows Directory Path
    4. Brickflow Version
    5. Spark Expectations Version
"},{"location":"tasks/","title":"Tasks","text":"

A task in Databricks workflows refers to a single unit of work that is executed as part of a larger data processing pipeline. Tasks are typically designed to perform a specific set of operations on data, such as loading data from a source, transforming the data, and storing it in a destination. In brickflow, tasks as designed in such a way that

Assuming, that this is already read - workflow and workflow object is created

"},{"location":"tasks/#task","title":"Task","text":"

Databricks workflow task can be created by decorating a python function with brickflow's task function

task
from brickflow import Workflow\nwf = Workflow(...)\n@wf.task  # (1)!\ndef start():\npass\n@wf.task(name=\"custom_end\")  # (2)!\ndef end():\npass\n
  1. Create a task using a decorator pattern. The task name would default to the python function name. So a task will be created with the name \"start\"
  2. Creating a task and defining the task name explicitly instead of using the function name \"end\". The task will be created with the new name \"custom_end\"
"},{"location":"tasks/#task-dependency","title":"Task dependency","text":"

Define task dependency by using a variable \"depends_on\" in the task function. You can provide the dependent tasks as direct python callables or string or list of callables/strings

task_dependency
from brickflow import Workflow\nwf = Workflow(...)\n@wf.task\ndef start():\npass\n@wf.task(depends_on=start)  # (1)!\ndef bronze_layer():\npass\n@wf.task(depends_on=\"bronze_layer\")  # (2)!\ndef x_silver():\npass\n@wf.task(depends_on=bronze_layer)\ndef y_silver():\npass\n@wf.task(depends_on=[x_silver, y_silver])  # (3)!\ndef xy_gold():\npass\n@wf.task(name=\"custom_z_gold\", depends_on=[x_silver, \"y_silver\"])  # (4)!\ndef z_gold():\npass\n@wf.task(depends_on=[\"xy_gold\", \"custom_z_gold\"])  # (5)!\ndef end():\npass\n
  1. Create dependency on task \"start\" and it is passed as callable
  2. Create dependency on task \"bronze_layer\" and it is passed as a string
  3. Create dependency on multiple tasks using list and the tasks are callables
  4. Create dependency on multiple tasks using list but one task is a callable and another is a string
  5. Create dependency on multiple tasks using list and tasks are passed as string. \"custom_z_gold\" is the task name that is explicitly defined - should not use \"z_gold\" which is a function name
"},{"location":"tasks/#task-parameters","title":"Task parameters","text":"

Task parameters can be defined as key value pairs in the function definition on which task is defined

task_parameters
from brickflow import Workflow\nwf = Workflow(...)\n@wf.task\ndef task_function(*, test=\"var\", test1=\"var1\"):  # (1)!\nprint(test)\nprint(test1)\n
  1. To pass the task specific parameters, need to start with \"*\" and then key value pairs start
"},{"location":"tasks/#common-task-parameters","title":"Common task parameters","text":"

In the workflows section, we saw how the common task parameters are created at the workflow level. Now in this section, we shall see how to use the common task parameters

use_common_task_parameters
from brickflow import Workflow, ctx\nwf = Workflow(...)\n@wf.task\ndef common_params():\nimport some_pyspark_function  # (1)!\ncatalog_env = ctx.dbutils_widget_get_or_else(key=\"catalog\", debug=\"local\")  # (2)!\nsome_pyspark_function(catalog_env)  # (3)!\n
  1. It is recommended to use localized imports in tasks rather than the global imports
  2. Brickflow provides the context using which we can fetch the task parameters that are defined. Providing debug is mandatory or else there will be a compilation error while deploying
  3. The extracted task_parameter_value can be used as any python variable. In this example, we are just passing the variable to \"some_pyspark_function\"
"},{"location":"tasks/#inbuilt-task-parameters","title":"Inbuilt task parameters","text":"

There are many inbuilt task parameters that be accessed using brickflow context like above

inbuilt_task_parameters
from brickflow import Workflow, ctx\nwf = Workflow(...)\n@wf.task\ndef inbuilt_params():\nprint(ctx.dbutils_widget_get_or_else(\nkey=\"brickflow_env\",  # (1)! \ndebug=\"local\"))\nprint(ctx.dbutils_widget_get_or_else(\nkey=\"brickflow_run_id\",  # (2)! \ndebug=\"788868\"))\nprint(ctx.dbutils_widget_get_or_else(\nkey=\"brickflow_job_id\",  # (3)! \ndebug=\"987987987987987\"))\nprint(ctx.dbutils_widget_get_or_else(\nkey=\"brickflow_start_date\",  # (4)! \ndebug=\"2023-05-03\"))\nprint(ctx.dbutils_widget_get_or_else(\nkey=\"brickflow_start_time\",  # (5)! \ndebug=\"1683102411626\"))\nprint(ctx.dbutils_widget_get_or_else(\nkey=\"brickflow_task_retry_count\",  # (6)! \ndebug=\"2\"))\nprint(ctx.dbutils_widget_get_or_else(\nkey=\"brickflow_parent_run_id\",  # (7)! \ndebug=\"788869\"))\nprint(ctx.dbutils_widget_get_or_else(\nkey=\"brickflow_task_key\",  # (8)! \ndebug=\"inbuilt_params\"))\nprint(ctx.dbutils_widget_get_or_else(\nkey=\"brickflow_internal_workflow_name\",  # (9)! \ndebug=\"Sample_Workflow\"))\nprint(ctx.dbutils_widget_get_or_else(\nkey=\"brickflow_internal_task_name\",  # (10)! \ndebug=\"inbuilt_params\"))\nprint(ctx.dbutils_widget_get_or_else(\nkey=\"brickflow_internal_workflow_prefix\",  # (11)! \ndebug=\"inbuilt_params\"))\nprint(ctx.dbutils_widget_get_or_else(\nkey=\"brickflow_internal_workflow_suffix\",  # (12)! \ndebug=\"inbuilt_params\"))\n
  1. \"brickflow_env\" holds the value of the --env variable which was used when brickflow is deployed
  2. \"brickflow_run_id\" holds the value of the current task run id
  3. \"brickflow_job_id\" holds the value of the current workflow job id
  4. \"brickflow_start_date\" holds the value of the current workflow start date
  5. \"brickflow_start_time\" holds the value of the current task start time
  6. \"brickflow_task_retry_count\" holds the value of number of retries a task can run, when a failure occurs
  7. \"brickflow_parent_run_id\" hold the value of the current workflow run_id
  8. \"brickflow_task_key\" holds the value of the current task name
  9. \"brickflow_internal_workflow_name\" holds the value of the current workflow name
  10. \"brickflow_internal_task_name\" holds the value of the current task name
  11. \"brickflow_internal_workflow_prefix\" holds the value of the prefix used for the current workflow name
  12. \"brickflow_internal_workflow_suffix\" holds the value of the suffix used for the current workflow name
"},{"location":"tasks/#clusters","title":"Clusters","text":"

There is a flexibility to use different clusters for each task or assign custom clusters

clusters
from brickflow import Workflow, Cluster\nwf = Workflow(...)\n@wf.task(cluster=Cluster(...))  # (1)!\ndef custom_cluster():\npass\n
  1. You will be able to create a job cluster or use existing cluster. Refer to this section in the workflows to understand how to implement
"},{"location":"tasks/#libraries","title":"Libraries","text":"

There is a flexibility to use specific libraries for a particular task

libraries
from brickflow import Workflow\nwf = Workflow(...)\n@wf.task(libraries=[...])  # (1)!\ndef custom_libraries():\npass\n
  1. You will be able to install libraries that are specific to a task. Refer to this section in the workflows to understand how to implement
"},{"location":"tasks/#task-types","title":"Task types","text":"

There are different task types that are supported by brickflow right now. The default task type that is used by brickflow is NOTEBOOK

task_types
from brickflow import Workflow, TaskType, BrickflowTriggerRule, TaskResponse\nwf = Workflow(...)\n@wf.task\ndef notebook_task():\npass\n@wf.task(task_type=TaskType.DLT)\ndef dlt_task():\npass\n@wf.task(\ntask_type=TaskType.CUSTOM_PYTHON_TASK,  # (1)!\ntrigger_rule=BrickflowTriggerRule.NONE_FAILED,  # (2)!\ncustom_execute_callback=lambda x: TaskResponse(x.name, \npush_return_value=True),  # (3)!\n)\ndef custom_python_task():\npass\n
  1. Provide the task type that is to be used for this task. Default is a notebook task
  2. Trigger rule can be attached. It can be ALL_SUCCESS or NONE_FAILED. In this case, this task will be triggered, if all the upstream tasks are at-least run and completed.
  3. Custom function that have to be executed as a call back. \"push_return_value\" will assign the output to task values. Task values can be compared to xcom values in airflow
"},{"location":"tasks/#trigger-rules","title":"Trigger rules","text":"

There are two types of trigger rules that can be applied on a task. It can be either ALL_SUCCESS or NONE_FAILED

task_types
from brickflow import Workflow, BrickflowTriggerRule\nwf = Workflow(...)\n@wf.task(\ntrigger_rule=BrickflowTriggerRule.NONE_FAILED  # (1)!\n)\ndef none_failed_task():\npass\n@wf.task(\ntrigger_rule=BrickflowTriggerRule.ALL_SUCCESS  # (2)!\n)\ndef all_success_task():\npass\n
  1. NONE_FAILED - use this if you want to trigger the task irrespective of the upstream tasks success or failure state
  2. ALL_SUCCESS - use this if you want to trigger the task only if all the upstream tasks are all having success state
"},{"location":"tasks/#operators","title":"Operators","text":"

We have adopted/extended certain airflow operators that might be needed to run as a task in databricks workflows. Typically for airflow operators we return the operator and brickflow will execute the operator based on task return type.

"},{"location":"tasks/#bash-operator","title":"Bash Operator","text":"

You will be able to use bash operator as below

bash_operator
from brickflow import Workflow\nfrom brickflow_plugins import BashOperator\nwf = Workflow(...)\n@wf.task\ndef bash_task():\nreturn BashOperator(task_id=bash_task.__name__, \nbash_command=\"ls -ltr\")  # (1)!\n
  1. Use Bashoperator like how we use in airflow but it has to be returned from task function
"},{"location":"tasks/#task-dependency-sensor","title":"Task Dependency Sensor","text":"

Even if you migrate to databricks workflows, brickflow gives you the flexibility to have a dependency on the airflow job

task_dependency_sensor
from brickflow import Workflow, ctx\nfrom brickflow_plugins import TaskDependencySensor\nwf = Workflow(...)\n@wf.task\ndef airflow_external_task_dependency_sensor():\nimport base64\ndata = base64.b64encode(\nctx.dbutils.secrets.get(\"brickflow-demo-tobedeleted\", \"okta_conn_id\").encode(\n\"utf-8\"\n)\n).decode(\"utf-8\")\nreturn TaskDependencySensor(\ntask_id=\"sensor\",\ntimeout=180,\nokta_conn_id=f\"b64://{data}\",\nexternal_dag_id=\"external_airlfow_dag\",\nexternal_task_id=\"hello\",\nallowed_states=[\"success\"],\nexecution_delta=None,\nexecution_delta_json=None,\ncluster_id=\"your_cluster_id\",\n)\n
"},{"location":"workflows/","title":"Workflows","text":"

A Workflow is similar to an Airflow dag that lets you encapsulate a set of tasks.

Here is an example of a workflow. Click the plus buttons to understand all the parts of the workflow file.

workflow.py
from datetime import timedelta\nfrom brickflow import Workflow, Cluster, WorkflowPermissions, User, \\\n    TaskSettings, EmailNotifications, PypiTaskLibrary, MavenTaskLibrary\nwf = Workflow(  # (1)!\n\"wf_test\",  # (2)!\ndefault_cluster=Cluster.from_existing_cluster(\"your_existing_cluster_id\"),  # (3)!\n# Optional parameters below\nschedule_quartz_expression=\"0 0/20 0 ? * * *\",  # (4)!\ntimezone=\"UTC\",  # (5)!\ndefault_task_settings=TaskSettings(  # (6)!\nemail_notifications=EmailNotifications(\non_start=[\"email@nike.com\"],\non_success=[\"email@nike.com\"],\non_failure=[\"email@nike.com\"]\n),\ntimeout_seconds=timedelta(hours=2).seconds\n),\nlibraries=[  # (7)!\nPypiTaskLibrary(package=\"requests\"),\nMavenTaskLibrary(coordinates=\"com.cronutils:cron-utils:9.2.0\"),\n],\ntags={  # (8)!\n\"product_id\": \"brickflow_demo\",\n\"slack_channel\": \"nike-sole-brickflow-support\"\n},\nmax_concurrent_runs=1,  # (9)!\npermissions=WorkflowPermissions(  # (10)!\ncan_manage_run=[User(\"abc@abc.com\")],\ncan_view=[User(\"abc@abc.com\")],\ncan_manage=[User(\"abc@abc.com\")],\n),\nprefix=\"feature-jira-xxx\",  # (11)!\nsuffix=\"_qa1\",  # (12)!\ncommon_task_parameters={  # (13)!\n\"catalog\": \"development\",\n\"database\": \"your_database\"\n},\n)\n@wf.task()  # (14)!\ndef task_function(*, test=\"var\"):\nreturn \"hello world\"\n
  1. Workflow definition which constructs the workflow object
  2. Define the workflow name
  3. The default cluster used for all the tasks in the workflow. This is an all-purpose cluster, but you can also create a job cluster
  4. Cron expression in the quartz format
  5. Define the timezone for your workflow. It is defaulted to UTC
  6. Default task setting that can be used for all the tasks
  7. Libraries that need to be installed for all the tasks
  8. Tags for the resulting workflow and other objects created during the workflow.
  9. Define the maximum number of concurrent runs
  10. Define the permissions on the workflow
  11. Prefix for the name of the workflow
  12. Suffix for the name of the workflow
  13. Define the common task parameters that can be used in all the tasks
  14. Define a workflow task and associate it to the workflow
"},{"location":"workflows/#clusters","title":"Clusters","text":"

There are two ways to define the cluster for the workflow or a task

"},{"location":"workflows/#using-an-existing-cluster","title":"Using an existing cluster","text":"existing_cluster
from brickflow import Cluster\ndefault_cluster=Cluster.from_existing_cluster(\"your_existing_cluster_id\")\n
"},{"location":"workflows/#use-a-job-cluster","title":"Use a job cluster","text":"job_cluster
from brickflow import Cluster\ndefault_cluster=Cluster(\nname=\"your_cluster_name\",\nspark_version='11.3.x-scala2.12',\nnode_type_id='m6g.xlarge',\ndriver_node_type_id='m6g.xlarge',\nmin_workers=1,\nmax_workers=3,\nenable_elastic_disk=True,\npolicy_id='your_policy_id',\naws_attributes={\n\"first_on_demand\": 1,\n\"availability\": \"SPOT_WITH_FALLBACK\",\n\"instance_profile_arn\": \"arn:aws:iam::XXXX:instance-profile/XXXX/group/XX\",\n\"spot_bid_price_percent\": 100,\n\"ebs_volume_type\": \"GENERAL_PURPOSE_SSD\",\n\"ebs_volume_count\": 3,\n\"ebs_volume_size\": 100\n}\n)\n
"},{"location":"workflows/#permissions","title":"Permissions","text":"

Brickflow provides an opportunity to manage permissions on the workflows. You can provide individual users or to a group or to a ServicePrincipal that can help manage, run or view the workflows.

Below example is for reference

manage_permissions
from brickflow import WorkflowPermissions, User, Group, ServicePrincipal\npermissions=WorkflowPermissions(\ncan_manage_run=[\nUser(\"abc@abc.com\"), \nGroup(\"app.xyz.team.Developer\"), \nServicePrincipal(\"ServicePrinciple_dbx_url.app.xyz.team.Developer\")\n],\ncan_view=[User(\"abc@abc.com\")],\ncan_manage=[User(\"abc@abc.com\")],\n)\n
"},{"location":"workflows/#tags","title":"Tags","text":"

Using brickflow, custom tags can be created on the workflow - but there are also some default tags that are created while the job is deployed.

The defaults tags that gets automatically attached to the workflow are below

  • \"brickflow_project_name\" : Brickflow Project Name that is referred from the entrypoint.py file
  • \"brickflow_version\" : Brickflow Version that is used to deploy the workflow
  • \"databricks_tf_provider_version\" : Databricks terraform provider version that is used to deploy the workflow
  • \"deployed_by\" : Email id of the profile that is used to deploy the workflow. It can be a user or a service principle. Whichever id is used to deploy the workflow, automatically becomes the owner of the workflow
  • \"environment\" : Environment to which the workflow is identified to

Use the below reference to define more tags and attach to the workflow. These can be used for collecting various metrics and build dashboards.

configure_tags
tags={\n\"product_id\": \"brickflow_demo\",\n\"slack_channel\": \"nike-sole-brickflow-support\"\n}\n
"},{"location":"workflows/#schedule","title":"Schedule","text":"

Databricks workflows uses Quartz cron expression unlike airflow's unix based cron scheduler. A typical Quartz cron expression have six or seven fields, seperated by spaces

second minute hour day_of_month month day_of_week year(optional)\n
Below is a sample

quartz_cron_expression
schedule_quartz_expression=\"0 0/20 0 ? * * *\"\n
"},{"location":"workflows/#tasksettings","title":"Tasksettings","text":"

Task setting at workflow level can be used to have common setting defined that will be applicable for all the tasks. Below is a sample that can be used for reference and all the parameters in TaskSettings are optional task_settings

from datetime import timedelta\nfrom brickflow import TaskSettings, EmailNotifications\ndefault_task_settings=TaskSettings(\nemail_notifications=EmailNotifications(\non_start=[\"email@nike.com\"],\non_success=[\"email@nike.com\"],\non_failure=[\"email@nike.com\"]\n),\ntimeout_seconds=timedelta(hours=2).seconds,\nmax_retries=2,\nmin_retry_interval_millis=60000,\nretry_on_timeout=True\n)\n

"},{"location":"workflows/#libraries","title":"Libraries","text":"

Brickflow allows to specify libraries that are need to be installed and used across different tasks. There are many ways to install library from different repositories/sources

libraries
from brickflow import PypiTaskLibrary, MavenTaskLibrary, StorageBasedTaskLibrary, \\\n    JarTaskLibrary, EggTaskLibrary, WheelTaskLibrary\nlibraries=[\nPypiTaskLibrary(package=\"requests\"),\nMavenTaskLibrary(coordinates=\"com.cronutils:cron-utils:9.2.0\"),\nStorageBasedTaskLibrary(\"s3://...\"),\nStorageBasedTaskLibrary(\"dbfs://...\"),\nJarTaskLibrary(\"s3://...\"),\nJarTaskLibrary(\"dbfs://...\"),\nEggTaskLibrary(\"s3://...\"),\nEggTaskLibrary(\"dbfs://...\"),\nWheelTaskLibrary(\"s3://...\"),\nWheelTaskLibrary(\"dbfs://...\"),\n]\n
"},{"location":"workflows/#common-task-parameters","title":"Common task parameters","text":"

Define the common parameters that can be used in all the tasks. Example could be database name, secrets_id etc

common_task_parameters
common_task_parameters={\n\"catalog\": \"development\",\n\"database\": \"your_database\"\n}\n
"},{"location":"api/airflow_external_task_dependency/","title":"AirflowTaskDependencySensor","text":""},{"location":"api/airflow_external_task_dependency/#brickflow_plugins.airflow.operators.external_tasks-attributes","title":"Attributes","text":""},{"location":"api/airflow_external_task_dependency/#brickflow_plugins.airflow.operators.external_tasks-classes","title":"Classes","text":""},{"location":"api/airflow_external_task_dependency/#brickflow_plugins.airflow.operators.external_tasks.MapDagSchedule","title":"brickflow_plugins.airflow.operators.external_tasks.MapDagSchedule","text":""},{"location":"api/airflow_external_task_dependency/#brickflow_plugins.airflow.operators.external_tasks.MapDagSchedule-functions","title":"Functions","text":""},{"location":"api/airflow_external_task_dependency/#brickflow_plugins.airflow.operators.external_tasks.MapDagSchedule.get_schedule","title":"get_schedule(wf_id: str, **args: str)","text":"

Function that the sensors defined while deriving this class should override.

Source code in brickflow_plugins/airflow/operators/external_tasks.py
def get_schedule(self, wf_id: str, **args):\n\"\"\"\n    Function that the sensors defined while deriving this class should\n    override.\n    \"\"\"\nraise Exception(\"Override me.\")\n
"},{"location":"api/airflow_external_task_dependency/#brickflow_plugins.airflow.operators.external_tasks.MapDagSchedule.get_task_run_status","title":"get_task_run_status(wf_id: str, task_id: str, run_date: str = None, cluster_id: str = None, **args: str)","text":"

Function that the sensors defined while deriving this class should override.

Source code in brickflow_plugins/airflow/operators/external_tasks.py
def get_task_run_status(\nself, wf_id: str, task_id: str, run_date=None, cluster_id=None, **args\n):\n\"\"\"\n    Function that the sensors defined while deriving this class should\n    override.\n    \"\"\"\nraise Exception(\"Override me.\")\n
"},{"location":"api/airflow_external_task_dependency/#brickflow_plugins.airflow.operators.external_tasks.MapDagScheduleHelper","title":"brickflow_plugins.airflow.operators.external_tasks.MapDagScheduleHelper(okta_conn_id: str)","text":"

Bases: MapDagSchedule

Source code in brickflow_plugins/airflow/operators/external_tasks.py
def __init__(self, okta_conn_id: str):\nself._okta_conn: Connection = Connection.get_connection_from_secrets(\nokta_conn_id\n)\n
"},{"location":"api/airflow_external_task_dependency/#brickflow_plugins.airflow.operators.external_tasks.MapDagScheduleHelper-functions","title":"Functions","text":""},{"location":"api/airflow_external_task_dependency/#brickflow_plugins.airflow.operators.external_tasks.MapDagScheduleHelper.get_access_token","title":"get_access_token() -> str","text":"Source code in brickflow_plugins/airflow/operators/external_tasks.py
def get_access_token(self) -> str:\nokta_url = self.get_okta_url()\nclient_id = self.get_okta_client_id()\nclient_secret = self.get_okta_client_secret()\nokta_url = os.getenv(\"OKTA_URL\", okta_url)\npayload = (\n\"client_id=\"\n+ client_id\n+ \"&client_secret=\"\n+ client_secret\n+ \"&grant_type=client_credentials\"\n)\nheaders = {\n\"Content-Type\": \"application/x-www-form-urlencoded\",\n\"cache-control\": \"no-cache\",\n}\nresponse = requests.post(okta_url, data=payload, headers=headers, timeout=600)\nif (\nresponse.status_code < HTTPStatus.OK\nor response.status_code > HTTPStatus.PARTIAL_CONTENT\n):\nlog.error(\n\"Failed request to Okta for JWT status_code={} response={} client_id={}\".format(\nresponse.status_code, response.text, client_id\n)\n)\ntoken_data = response.json()[\"access_token\"]\nreturn token_data\n
"},{"location":"api/airflow_external_task_dependency/#brickflow_plugins.airflow.operators.external_tasks.MapDagScheduleHelper.get_airflow_api_url","title":"get_airflow_api_url(cluster_id: str) -> str","text":"Source code in brickflow_plugins/airflow/operators/external_tasks.py
def get_airflow_api_url(self, cluster_id: str) -> str:\n# TODO: templatize this to a env variable\nbase_api_url = f\"https://proxy.us-east-1.map.nike.com/{cluster_id}\"\nreturn base_api_url\n
"},{"location":"api/airflow_external_task_dependency/#brickflow_plugins.airflow.operators.external_tasks.MapDagScheduleHelper.get_okta_client_id","title":"get_okta_client_id() -> str","text":"Source code in brickflow_plugins/airflow/operators/external_tasks.py
def get_okta_client_id(self) -> str:\nreturn self._okta_conn.login\n
"},{"location":"api/airflow_external_task_dependency/#brickflow_plugins.airflow.operators.external_tasks.MapDagScheduleHelper.get_okta_client_secret","title":"get_okta_client_secret() -> str","text":"Source code in brickflow_plugins/airflow/operators/external_tasks.py
def get_okta_client_secret(self) -> str:\nreturn self._okta_conn.get_password()\n
"},{"location":"api/airflow_external_task_dependency/#brickflow_plugins.airflow.operators.external_tasks.MapDagScheduleHelper.get_okta_url","title":"get_okta_url() -> str","text":"Source code in brickflow_plugins/airflow/operators/external_tasks.py
def get_okta_url(self) -> str:\nconn_type = self._okta_conn.conn_type\nhost = self._okta_conn.host\nschema = self._okta_conn.schema\nreturn f\"{conn_type}://{host}/{schema}\"\n
"},{"location":"api/airflow_external_task_dependency/#brickflow_plugins.airflow.operators.external_tasks.MapDagScheduleHelper.get_schedule","title":"get_schedule(wf_id: str, **kwargs: str)","text":"

get work flow schedule cron syntax

Source code in brickflow_plugins/airflow/operators/external_tasks.py
def get_schedule(self, wf_id: str, **kwargs):\n\"\"\"\n    get work flow schedule cron syntax\n    \"\"\"\nraise Exception(\"Do not have implementation\")\n
"},{"location":"api/airflow_external_task_dependency/#brickflow_plugins.airflow.operators.external_tasks.MapDagScheduleHelper.get_task_run_status","title":"get_task_run_status(wf_id: str, task_id: str, run_date: str = None, cluster_id: str = None, **args: str)","text":"Source code in brickflow_plugins/airflow/operators/external_tasks.py
def get_task_run_status(\nself, wf_id: str, task_id: str, run_date=None, cluster_id=None, **args\n):\ntoken_data = self.get_access_token()\napi_url = self.get_airflow_api_url(cluster_id)\nversion_nr = self.get_version(cluster_id)\ndag_id = wf_id\nheaders = {\n\"Content-Type\": \"application/json\",\n\"cache-control\": \"no-cache\",\n\"Authorization\": \"Bearer \" + token_data,\n}\no_task_status = \"UKN\"\nsession = requests.Session()\nretries = Retry(\ntotal=5, backoff_factor=1, status_forcelist=[502, 503, 504, 500]\n)\nsession.mount(\"https://\", HTTPAdapter(max_retries=retries))\nif version_nr.startswith(\"1.\"):\nlog.info(\"this is 1.x cluster\")\nurl = (\napi_url\n+ \"/api/experimental\"\n+ \"/dags/\"\n+ dag_id\n+ \"/dag_runs/\"\n+ run_date\n+ \"/tasks/\"\n+ task_id\n)\nelse:\nurl = (\napi_url\n+ \"/api/v1/dags/\"\n+ dag_id\n+ \"/dagRuns/scheduled__\"\n+ run_date\n+ \"/taskInstances/\"\n+ task_id\n)\nlog.info(f\"url= {url.replace(' ', '')}\")\nresponse = session.get(url.replace(\" \", \"\"), headers=headers)\nlog.info(\nf\"response.status_code= {response.status_code} response.text= {response.text}\"\n)\nif response.status_code == 200:\nlog.info(f\"response= {response.text}\")\njson_obj = json.loads(response.text)\nif type(json_obj) == dict:\no_task_status = json_obj[\"state\"]\nreturn o_task_status\nreturn o_task_status\n
"},{"location":"api/airflow_external_task_dependency/#brickflow_plugins.airflow.operators.external_tasks.MapDagScheduleHelper.get_version","title":"get_version(cluster_id: str) -> str","text":"Source code in brickflow_plugins/airflow/operators/external_tasks.py
def get_version(self, cluster_id: str) -> str:\nsession = requests.Session()\nretries = Retry(\ntotal=10, backoff_factor=1, status_forcelist=[502, 503, 504, 500]\n)\nsession.mount(\"https://\", HTTPAdapter(max_retries=retries))\nversion_check_url = (\nself.get_airflow_api_url(cluster_id) + \"/admin/rest_api/api?api=version\"\n)\nlogging.info(version_check_url)\notoken = self.get_access_token()\nheaders = {\"Authorization\": \"Bearer \" + otoken, \"Accept\": \"application/json\"}\nout_version = \"UKN\"\nresponse = session.get(version_check_url, headers=headers, verify=False)\nif response.status_code == HTTPStatus.OK:\nout_version = response.json()[\"output\"]\nlog.info(response.text.encode(\"utf8\"))\nsession.close()\nreturn out_version\n
"},{"location":"api/airflow_external_task_dependency/#brickflow_plugins.airflow.operators.external_tasks.TaskDependencySensor","title":"brickflow_plugins.airflow.operators.external_tasks.TaskDependencySensor(external_dag_id, external_task_id, okta_conn_id, allowed_states = None, execution_delta = None, execution_delta_json = None, cluster_id = None, *args, **kwargs)","text":"

Bases: BaseSensorOperator

Source code in brickflow_plugins/airflow/operators/external_tasks.py
def __init__(\nself,\nexternal_dag_id,\nexternal_task_id,\nokta_conn_id,\nallowed_states=None,\nexecution_delta=None,\nexecution_delta_json=None,\ncluster_id=None,\n*args,\n**kwargs,\n):\nsuper(TaskDependencySensor, self).__init__(*args, **kwargs)\nself.okta_conn_id = okta_conn_id\nself.allowed_states = allowed_states or [\"success\"]\nif execution_delta_json and execution_delta:\nraise Exception(\n\"Only one of `execution_date` or `execution_delta_json` maybe provided to Sensor; not more than one.\"\n)\nself.external_dag_id = external_dag_id\nself.external_task_id = external_task_id\nself.allowed_states = allowed_states\nself.execution_delta = execution_delta\nself.execution_delta_json = execution_delta_json\nself.cluster_id = cluster_id\nself._poke_count = 0\nself.dbx_wf_id = kwargs.get(\"dbx_wf_id\")\n
"},{"location":"api/airflow_external_task_dependency/#brickflow_plugins.airflow.operators.external_tasks.TaskDependencySensor-attributes","title":"Attributes","text":""},{"location":"api/airflow_external_task_dependency/#brickflow_plugins.airflow.operators.external_tasks.TaskDependencySensor.allowed_states","title":"allowed_states = allowed_states instance-attribute","text":""},{"location":"api/airflow_external_task_dependency/#brickflow_plugins.airflow.operators.external_tasks.TaskDependencySensor.cluster_id","title":"cluster_id = cluster_id instance-attribute","text":""},{"location":"api/airflow_external_task_dependency/#brickflow_plugins.airflow.operators.external_tasks.TaskDependencySensor.dbx_wf_id","title":"dbx_wf_id = kwargs.get('dbx_wf_id') instance-attribute","text":""},{"location":"api/airflow_external_task_dependency/#brickflow_plugins.airflow.operators.external_tasks.TaskDependencySensor.execution_delta","title":"execution_delta = execution_delta instance-attribute","text":""},{"location":"api/airflow_external_task_dependency/#brickflow_plugins.airflow.operators.external_tasks.TaskDependencySensor.execution_delta_json","title":"execution_delta_json = execution_delta_json instance-attribute","text":""},{"location":"api/airflow_external_task_dependency/#brickflow_plugins.airflow.operators.external_tasks.TaskDependencySensor.external_dag_id","title":"external_dag_id = external_dag_id instance-attribute","text":""},{"location":"api/airflow_external_task_dependency/#brickflow_plugins.airflow.operators.external_tasks.TaskDependencySensor.external_task_id","title":"external_task_id = external_task_id instance-attribute","text":""},{"location":"api/airflow_external_task_dependency/#brickflow_plugins.airflow.operators.external_tasks.TaskDependencySensor.okta_conn_id","title":"okta_conn_id = okta_conn_id instance-attribute","text":""},{"location":"api/airflow_external_task_dependency/#brickflow_plugins.airflow.operators.external_tasks.TaskDependencySensor-functions","title":"Functions","text":""},{"location":"api/airflow_external_task_dependency/#brickflow_plugins.airflow.operators.external_tasks.TaskDependencySensor.poke","title":"poke(context)","text":"Source code in brickflow_plugins/airflow/operators/external_tasks.py
def poke(self, context):\nlog.info(f\"executing poke.. {self._poke_count}\")\nself._poke_count = self._poke_count + 1\nlogging.info(\"Poking.. {0} round\".format(str(self._poke_count)))\nexec_time = context[\"execution_date\"]\ntask_status = MapDagScheduleHelper(self.okta_conn_id).get_task_run_status(\nwf_id=self.external_dag_id,\ntask_id=self.external_task_id,\nrun_date=exec_time,\ncluster_id=self.cluster_id,\n)\nlog.info(f\"task_status= {task_status}\")\nif task_status not in self.allowed_states:\ncount = 0\nelse:\ncount = 1\nreturn count\n
"},{"location":"api/airflow_native_operators/","title":"AirflowNativeOperators","text":""},{"location":"api/airflow_native_operators/#brickflow_plugins.airflow.operators.native_operators-attributes","title":"Attributes","text":""},{"location":"api/airflow_native_operators/#brickflow_plugins.airflow.operators.native_operators-classes","title":"Classes","text":""},{"location":"api/airflow_native_operators/#brickflow_plugins.airflow.operators.native_operators.BashOperatorModifier","title":"brickflow_plugins.airflow.operators.native_operators.BashOperatorModifier","text":"

Bases: OperatorModifier

"},{"location":"api/airflow_native_operators/#brickflow_plugins.airflow.operators.native_operators.BashOperatorModifier-functions","title":"Functions","text":""},{"location":"api/airflow_native_operators/#brickflow_plugins.airflow.operators.native_operators.BashOperatorModifier.modify","title":"modify(operator: BashOperator, task: Task, workflow: Workflow) -> Optional[BashOperator]","text":"Source code in brickflow_plugins/airflow/operators/native_operators.py
@check_if(BashOperator)\ndef modify(\nself, operator: BashOperator, task: Task, workflow: Workflow\n) -> Optional[\"BashOperator\"]:\nf = types.MethodType(_bash_execute, operator)\noperator.execute = f\noperator.on_kill = _bash_empty_on_kill\nreturn operator\n
"},{"location":"api/airflow_native_operators/#brickflow_plugins.airflow.operators.native_operators.BranchPythonOperatorModifier","title":"brickflow_plugins.airflow.operators.native_operators.BranchPythonOperatorModifier","text":"

Bases: OperatorModifier

"},{"location":"api/airflow_native_operators/#brickflow_plugins.airflow.operators.native_operators.BranchPythonOperatorModifier-functions","title":"Functions","text":""},{"location":"api/airflow_native_operators/#brickflow_plugins.airflow.operators.native_operators.BranchPythonOperatorModifier.modify","title":"modify(operator: BranchPythonOperator, task: Task, workflow: Workflow) -> Optional[BranchPythonOperator]","text":"Source code in brickflow_plugins/airflow/operators/native_operators.py
@check_if(BranchPythonOperator)\ndef modify(\nself, operator: BranchPythonOperator, task: Task, workflow: Workflow\n) -> Optional[\"BranchPythonOperator\"]:\nf = types.MethodType(_skip_all_except, operator)\noperator.skip_all_except = f\nreturn operator\n
"},{"location":"api/airflow_native_operators/#brickflow_plugins.airflow.operators.native_operators.ShortCircuitOperatorModifier","title":"brickflow_plugins.airflow.operators.native_operators.ShortCircuitOperatorModifier","text":"

Bases: OperatorModifier

"},{"location":"api/airflow_native_operators/#brickflow_plugins.airflow.operators.native_operators.ShortCircuitOperatorModifier-functions","title":"Functions","text":""},{"location":"api/airflow_native_operators/#brickflow_plugins.airflow.operators.native_operators.ShortCircuitOperatorModifier.modify","title":"modify(operator: ShortCircuitOperator, task: Task, workflow: Workflow) -> Optional[ShortCircuitOperator]","text":"Source code in brickflow_plugins/airflow/operators/native_operators.py
@check_if(ShortCircuitOperator)\ndef modify(\nself, operator: ShortCircuitOperator, task: Task, workflow: Workflow\n) -> Optional[\"ShortCircuitOperator\"]:\nf = types.MethodType(_short_circuit_execute, operator)\noperator.execute = f\nreturn operator\n
"},{"location":"api/airflow_native_operators/#brickflow_plugins.airflow.operators.native_operators-functions","title":"Functions","text":""},{"location":"api/cli/","title":"CLI","text":""},{"location":"api/cli/#brickflow.cli-attributes","title":"Attributes","text":""},{"location":"api/cli/#brickflow.cli-classes","title":"Classes","text":""},{"location":"api/cli/#brickflow.cli.CdktfCmd","title":"brickflow.cli.CdktfCmd","text":"

Bases: click.Group

"},{"location":"api/cli/#brickflow.cli.CdktfCmd-functions","title":"Functions","text":""},{"location":"api/cli/#brickflow.cli.CdktfCmd.get_command","title":"get_command(ctx: click.Context, cmd_name: str) -> Optional[click.Command]","text":"Source code in brickflow/cli/__init__.py
def get_command(self, ctx: click.Context, cmd_name: str) -> Optional[click.Command]:\nif cmd_name == BrickflowDeployMode.CDKTF.value:\nreturn cdktf_command()\nelif cmd_name == BrickflowDeployMode.BUNDLE.value:\nreturn bundles_proxy_command()\n# elif cmd_name in [\"deploy\", \"diff\"]:\n#     return cdktf_command(cmd_name)\nelse:\nrv = click.Group.get_command(self, ctx, cmd_name)\nif rv is not None:\nreturn rv\nraise ctx.fail(f\"No such command '{cmd_name}'.\")\n
"},{"location":"api/cli/#brickflow.cli-functions","title":"Functions","text":""},{"location":"api/cli/#brickflow.cli.bundle","title":"brickflow.cli.bundle() -> None","text":"

CLI for proxying to databricks bundles cli.

Source code in brickflow/cli/__init__.py
@cli.command\ndef bundle() -> None:\n\"\"\"CLI for proxying to databricks bundles cli.\"\"\"\n# Hack for having bundle show up as a command in brickflow\n# with documentation.\npass  # pragma: no cover\n
"},{"location":"api/cli/#brickflow.cli.bundles_proxy_command","title":"brickflow.cli.bundles_proxy_command() -> click.Command","text":"Source code in brickflow/cli/__init__.py
def bundles_proxy_command() -> click.Command:\ndef run_bundle_command(args: Optional[List[str]] = None, **_: Any) -> None:\nbundle_cli_setup()\nbundle_cli = config(\nBrickflowEnvVars.BRICKFLOW_BUNDLE_CLI_EXEC.value, \"databricks\"\n)\nlog_important_versions(bundle_cli)\nexec_command(bundle_cli, \"bundle\", args or [])\n@click.command(\nname=\"bundles_cmd\",\nshort_help=\"CLI for proxying to databricks bundles cli..\",\ncontext_settings={\"ignore_unknown_options\": True},\nadd_help_option=False,\n)\n@click.argument(\"args\", nargs=-1)\ndef cmd(args: List[str]) -> None:\n# check to make sure you are in project root and then set python path to whole dir\nrun_bundle_command(args=args)\nreturn cmd\n
"},{"location":"api/cli/#brickflow.cli.cdktf","title":"brickflow.cli.cdktf() -> None","text":"

CLI for proxying to cdktf cli.

Source code in brickflow/cli/__init__.py
@cli.command\ndef cdktf() -> None:\n\"\"\"CLI for proxying to cdktf cli.\"\"\"\n# Hack for having cdktf show up as a command in brickflow\n# with documentation.\npass  # pragma: no cover\n
"},{"location":"api/cli/#brickflow.cli.cdktf_command","title":"brickflow.cli.cdktf_command(base_command: Optional[str] = None) -> click.Command","text":"Source code in brickflow/cli/__init__.py
def cdktf_command(base_command: Optional[str] = None) -> click.Command:\n@click.command(\nname=\"cdktf_cmd\",\nshort_help=\"CLI for proxying to CDKTF cli.\",\ncontext_settings={\"ignore_unknown_options\": True},\nadd_help_option=False,\ndeprecated=True,\n)\n@click.argument(\"args\", nargs=-1)\ndef cmd(args: Tuple[str]) -> None:\n# check to make sure you are in project root and then set python path to whole dir\nexec_cdktf_command(base_command, args)\nreturn cmd\n
"},{"location":"api/cli/#brickflow.cli.cdktf_env_set_options","title":"brickflow.cli.cdktf_env_set_options(f: Callable) -> Callable","text":"Source code in brickflow/cli/__init__.py
def cdktf_env_set_options(f: Callable) -> Callable:\ndef local_mode_callback(ctx: click.Context, param: str, value: Any) -> None:  # noqa\n# pylint: disable=unused-argument\nif value is not None and value is True:\n_ilog.info(\n\"Configuring environment to %s...\",\nBrickflowDefaultEnvs.LOCAL.value,\n)\nos.environ[\nBrickflowEnvVars.BRICKFLOW_ENV.value\n] = BrickflowDefaultEnvs.LOCAL.value\ndef deploy_only_workflows(\nctx: click.Context, param: str, value: Any\n) -> None:  # noqa\n# pylint: disable=unused-argument\nif value:\nfor file in value:\nif file[-3:] != \".py\":\nraise ClickException(\"Should pass only python files as workflows\")\n_ilog.info(\"Brickflow will only deploy workflows: %s\", \", \".join(value))\nos.environ[\nBrickflowEnvVars.BRICKFLOW_DEPLOY_ONLY_WORKFLOWS.value\n] = \",\".join(value)\ndef set_up_cdktf_for_workflow_dir(\nctx: click.Context, param: str, value: Any  # noqa\n) -> None:\nif value is not None:\nreturn value\noptions = [\nclick.option(\n\"--local-mode\",\n\"-l\",\nis_flag=True,\ncallback=local_mode_callback,\nhelp=\"Set the environment flag to local and other components [TBD] are disabled in local mode.\",\n),\nclick.option(\n\"--workflows-dir\",\n\"-wd\",\ntype=click.Path(exists=True, file_okay=False),\nprompt=INTERACTIVE_MODE,\ncallback=set_up_cdktf_for_workflow_dir,\nhelp=\"Provide the workflow directory that has to be deployed\",\n),\nclick.option(\n\"--workflow\",\n\"-w\",\ntype=str,\nmultiple=True,\ncallback=deploy_only_workflows,\nhelp=\"\"\"Provide the workflow file names which you want to deploy, each file name separated by space!\n                    Example: bf deploy -p DEFAULT -l -w wf1.py -w wf2.py\"\"\",\n),\nclick.option(\n\"--env\",\n\"-e\",\ndefault=BrickflowDefaultEnvs.LOCAL.value,\ntype=str,\ncallback=bind_env_var(BrickflowEnvVars.BRICKFLOW_ENV.value),\nhelp=\"Set the environment value, certain tags [TBD] get added to the workflows based on this value.\",\n),\nclick.option(\n\"--repo-url\",\n\"-r\",\ndefault=None,\ntype=str,\ncallback=bind_env_var(BrickflowEnvVars.BRICKFLOW_GIT_REPO.value),\nhelp=\"The github url in which to run brickflow with.\",\n),\nclick.option(\n\"--git-ref\",\ndefault=None,\ntype=str,\ncallback=bind_env_var(BrickflowEnvVars.BRICKFLOW_GIT_REF.value),\nhelp=\"The commit/tag/branch to use in github.\",\n),\nclick.option(\n\"--git-provider\",\ndefault=None,\ntype=str,\ncallback=bind_env_var(BrickflowEnvVars.BRICKFLOW_GIT_PROVIDER.value),\nhelp=\"The github provider for brickflow this is used for configuring github on DBX jobs.\",\n),\nclick.option(\n\"--profile\",\n\"-p\",\ndefault=None,\ntype=str,\ncallback=bind_env_var(\nBrickflowEnvVars.BRICKFLOW_DATABRICKS_CONFIG_PROFILE.value\n),\nhelp=\"The databricks profile to use for authenticating to databricks during deployment.\",\n),\n]\nfor option in options:\nf = option(f)\nreturn f\n
"},{"location":"api/cli/#brickflow.cli.cli","title":"brickflow.cli.cli() -> None","text":"

CLI for managing Databricks Workflows

Source code in brickflow/cli/__init__.py
@click.group(invoke_without_command=True, no_args_is_help=True, cls=CdktfCmd)\n@click.version_option(prog_name=\"brickflow\")\ndef cli() -> None:\n\"\"\"CLI for managing Databricks Workflows\"\"\"\n
"},{"location":"api/cli/#brickflow.cli.deploy","title":"brickflow.cli.deploy(**kwargs: Any) -> None","text":"

CLI for deploying workflow projects.

Source code in brickflow/cli/__init__.py
@cli.command\n@click.option(\n\"--auto-approve\",\ntype=bool,\nis_flag=True,\nshow_default=True,\ndefault=False,\nhelp=\"Auto approve brickflow pipeline without being prompted to approve.\",\n)\n@click.option(\n\"--deploy-mode\",\ntype=click.Choice([\"cdktf\", \"bundle\"]),\nshow_default=True,\ndefault=\"cdktf\",\nhelp=\"Which deployment framework to use to deploy.\",\n)\n@click.option(\n\"--force-acquire-lock\",\ntype=bool,\nis_flag=True,\nshow_default=True,\ndefault=False,\nhelp=\"Force acquire lock for databricks bundles deploy.\",\n)\n@cdktf_env_set_options\ndef deploy(**kwargs: Any) -> None:\n\"\"\"CLI for deploying workflow projects.\"\"\"\n# Hack for having cdktf show up as a command in brickflow\n# with documentation.\ndeploy_mode = get_deployment_mode(**kwargs)\nif deploy_mode == BrickflowDeployMode.CDKTF:\nmake_cdktf_json(**kwargs)\nexec_cdktf_command(\"deploy\", get_cdktf_specific_args(**kwargs))\nelse:\ndisable_project_name_in_env()\nbundle_deploy(**kwargs)\n
"},{"location":"api/cli/#brickflow.cli.destroy","title":"brickflow.cli.destroy(**kwargs: Any) -> None","text":"

CLI for destroying workflow projects.

Source code in brickflow/cli/__init__.py
@cli.command\n@click.option(\n\"--auto-approve\",\ntype=bool,\nis_flag=True,\nshow_default=True,\ndefault=False,\nhelp=\"Auto approve brickflow pipeline without being prompted to approve.\",\n)\n@click.option(\n\"--deploy-mode\",\ntype=click.Choice([\"cdktf\", \"bundle\"]),\nshow_default=True,\ndefault=\"cdktf\",\nhelp=\"Which deployment framework to use to deploy.\",\n)\n@click.option(\n\"--force-acquire-lock\",\ntype=bool,\nis_flag=True,\nshow_default=True,\ndefault=False,\nhelp=\"Force acquire lock for databricks bundles destroy.\",\n)\n@cdktf_env_set_options\ndef destroy(**kwargs: Any) -> None:\n\"\"\"CLI for destroying workflow projects.\"\"\"\n# Hack for having cdktf show up as a command in brickflow\n# with documentation.\ndeploy_mode = get_deployment_mode(**kwargs)\nif deploy_mode == BrickflowDeployMode.CDKTF:\nmake_cdktf_json(**kwargs)\nexec_cdktf_command(\"destroy\", get_cdktf_specific_args(**kwargs))\nelse:\ndisable_project_name_in_env()\nbundle_destroy(**kwargs)\n
"},{"location":"api/cli/#brickflow.cli.diff","title":"brickflow.cli.diff(**kwargs: Any) -> None","text":"

CLI for identifying diff in projects (only cdktf supported).

Source code in brickflow/cli/__init__.py
@cli.command\n@cdktf_env_set_options\ndef diff(**kwargs: Any) -> None:\n\"\"\"CLI for identifying diff in projects (only cdktf supported).\"\"\"\n# Hack for having cdktf show up as a command in brickflow\n# with documentation.\nmake_cdktf_json(**kwargs)\nexec_cdktf_command(\"diff\", [])\n
"},{"location":"api/cli/#brickflow.cli.disable_project_name_in_env","title":"brickflow.cli.disable_project_name_in_env() -> None","text":"Source code in brickflow/cli/__init__.py
def disable_project_name_in_env() -> None:\n# TODO: delete this when deploy commands are gone\n# used for legacy bundles deploy and destroy commands\n# disable multiple projects in same directory\nos.environ[BrickflowEnvVars.BRICKFLOW_USE_PROJECT_NAME.value] = \"False\"\n
"},{"location":"api/cli/#brickflow.cli.docs","title":"brickflow.cli.docs() -> None","text":"

Use to open docs in your browser...

Source code in brickflow/cli/__init__.py
@cli.command\ndef docs() -> None:\n\"\"\"Use to open docs in your browser...\"\"\"\ndocs_site = \"https://verbose-garbanzo-6b8a1ae2.pages.github.io/\"\nwebbrowser.open(docs_site, new=2)\nclick.echo(f\"Opening browser for docs... site: {docs_site}\")\n
"},{"location":"api/cli/#brickflow.cli.get_cdktf_specific_args","title":"brickflow.cli.get_cdktf_specific_args(**kwargs: Dict[str, Any]) -> List[str]","text":"Source code in brickflow/cli/__init__.py
def get_cdktf_specific_args(**kwargs: Dict[str, Any]) -> List[str]:\nargs = []\nif kwargs.get(\"auto_approve\", False) is True:\nargs.append(\"--auto-approve\")\nreturn args\n
"},{"location":"api/cli/#brickflow.cli.get_deployment_mode","title":"brickflow.cli.get_deployment_mode(**kwargs: Dict[str, Any]) -> BrickflowDeployMode","text":"Source code in brickflow/cli/__init__.py
def get_deployment_mode(**kwargs: Dict[str, Any]) -> BrickflowDeployMode:\n# set deployment mode for cdktf or bundle\nos.environ[BrickflowEnvVars.BRICKFLOW_DEPLOYMENT_MODE.value] = str(\nkwargs.get(\"deploy_mode\", BrickflowDeployMode.CDKTF.value)\n)\nif (\nkwargs.get(\"deploy_mode\", BrickflowDeployMode.CDKTF.value)\n== BrickflowDeployMode.CDKTF.value\n):\nreturn BrickflowDeployMode.CDKTF\nelse:\nreturn BrickflowDeployMode.BUNDLE\n
"},{"location":"api/cli/#brickflow.cli.make_cdktf_json","title":"brickflow.cli.make_cdktf_json(**kwargs: Any) -> None","text":"Source code in brickflow/cli/__init__.py
def make_cdktf_json(**kwargs: Any) -> None:\nwd: Optional[str] = kwargs.get(\"workflows_dir\")\nif wd is None:\nraise ValueError(\n\"workflows_dir not set, please set it using --workflows-dir or -wd\"\n)\nidempotent_cdktf_out(wd)\n
"},{"location":"api/cli/#brickflow.cli.sync","title":"brickflow.cli.sync(**kwargs: Any) -> None","text":"

Synchronize your bundle tree to databricks workspace (only supported by bundle deployment mode).

Source code in brickflow/cli/__init__.py
@cli.command\n@click.option(\n\"--deploy-mode\",\ntype=click.Choice([\"bundle\"]),\nshow_default=True,\ndefault=\"bundle\",\nhelp=\"Which deployment framework to use to deploy.\",\n)\n@click.option(\n\"--watch\",\ntype=bool,\nis_flag=True,\nshow_default=True,\ndefault=False,\nhelp=\"Enable filewatcher to sync files over.\",\n)\n@click.option(\n\"--full\",\ntype=bool,\nis_flag=True,\nshow_default=True,\ndefault=False,\nhelp=\"Run a full sync.\",\n)\n@click.option(\n\"--interval-duration\",\ntype=str,\nshow_default=True,\ndefault=None,\nhelp=\"File system polling interval (for --watch).\",\n)\n@click.option(\n\"--debug\",\ntype=str,\nshow_default=True,\ndefault=None,\nhelp=\"File system polling interval (for --watch).\",\n)\n@cdktf_env_set_options\ndef sync(**kwargs: Any) -> None:\n\"\"\"Synchronize your bundle tree to databricks workspace (only supported by bundle deployment mode).\"\"\"\ndeploy_mode = get_deployment_mode(**kwargs)\nif deploy_mode == BrickflowDeployMode.BUNDLE:\nbundle_sync(**kwargs)\nelse:\nraise ClickException(\n\"Unsupported deploy mode for sync; currently only supports bundle deploy mode.\"\n)\n
"},{"location":"api/compute/","title":"Compute","text":""},{"location":"api/compute/#brickflow.engine.compute-classes","title":"Classes","text":""},{"location":"api/compute/#brickflow.engine.compute.Cluster","title":"brickflow.engine.compute.Cluster dataclass","text":""},{"location":"api/compute/#brickflow.engine.compute.Cluster-attributes","title":"Attributes","text":""},{"location":"api/compute/#brickflow.engine.compute.Cluster.aws_attributes","title":"aws_attributes: Optional[Dict[str, Any]] = None class-attribute instance-attribute","text":""},{"location":"api/compute/#brickflow.engine.compute.Cluster.custom_tags","title":"custom_tags: Optional[Dict[str, str]] = None class-attribute instance-attribute","text":""},{"location":"api/compute/#brickflow.engine.compute.Cluster.data_security_mode","title":"data_security_mode: str = DataSecurityMode.SINGLE_USER class-attribute instance-attribute","text":""},{"location":"api/compute/#brickflow.engine.compute.Cluster.dlt_auto_scale_mode","title":"dlt_auto_scale_mode: Optional[str] = None class-attribute instance-attribute","text":""},{"location":"api/compute/#brickflow.engine.compute.Cluster.driver_instance_pool_id","title":"driver_instance_pool_id: Optional[str] = None class-attribute instance-attribute","text":""},{"location":"api/compute/#brickflow.engine.compute.Cluster.driver_node_type_id","title":"driver_node_type_id: Optional[str] = None class-attribute instance-attribute","text":""},{"location":"api/compute/#brickflow.engine.compute.Cluster.enable_elastic_disk","title":"enable_elastic_disk: Optional[bool] = None class-attribute instance-attribute","text":""},{"location":"api/compute/#brickflow.engine.compute.Cluster.existing_cluster_id","title":"existing_cluster_id: Optional[str] = None class-attribute instance-attribute","text":""},{"location":"api/compute/#brickflow.engine.compute.Cluster.init_scripts","title":"init_scripts: Optional[List[Dict[str, str]]] = None class-attribute instance-attribute","text":""},{"location":"api/compute/#brickflow.engine.compute.Cluster.instance_pool_id","title":"instance_pool_id: Optional[str] = None class-attribute instance-attribute","text":""},{"location":"api/compute/#brickflow.engine.compute.Cluster.is_new_job_cluster","title":"is_new_job_cluster: bool property","text":""},{"location":"api/compute/#brickflow.engine.compute.Cluster.job_task_field_dict","title":"job_task_field_dict: Dict[str, str] property","text":""},{"location":"api/compute/#brickflow.engine.compute.Cluster.max_workers","title":"max_workers: Optional[int] = None class-attribute instance-attribute","text":""},{"location":"api/compute/#brickflow.engine.compute.Cluster.min_workers","title":"min_workers: Optional[int] = None class-attribute instance-attribute","text":""},{"location":"api/compute/#brickflow.engine.compute.Cluster.name","title":"name: str instance-attribute","text":""},{"location":"api/compute/#brickflow.engine.compute.Cluster.node_type_id","title":"node_type_id: str instance-attribute","text":""},{"location":"api/compute/#brickflow.engine.compute.Cluster.num_workers","title":"num_workers: Optional[int] = None class-attribute instance-attribute","text":""},{"location":"api/compute/#brickflow.engine.compute.Cluster.policy_id","title":"policy_id: Optional[str] = None class-attribute instance-attribute","text":""},{"location":"api/compute/#brickflow.engine.compute.Cluster.runtime_engine","title":"runtime_engine: Optional[Literal['STANDARD', 'PHOTON']] = None class-attribute instance-attribute","text":""},{"location":"api/compute/#brickflow.engine.compute.Cluster.spark_conf","title":"spark_conf: Optional[Dict[str, str]] = None class-attribute instance-attribute","text":""},{"location":"api/compute/#brickflow.engine.compute.Cluster.spark_env_vars","title":"spark_env_vars: Optional[Dict[str, str]] = None class-attribute instance-attribute","text":""},{"location":"api/compute/#brickflow.engine.compute.Cluster.spark_version","title":"spark_version: str instance-attribute","text":""},{"location":"api/compute/#brickflow.engine.compute.Cluster-functions","title":"Functions","text":""},{"location":"api/compute/#brickflow.engine.compute.Cluster.__hash__","title":"__hash__() -> int","text":"Source code in brickflow/engine/compute.py
def __hash__(self) -> int:\n# dedupe dicts and lists which are default un hashable. Easiest way to identify dupes.\nreturn hash(json.dumps(self.as_dict()))\n
"},{"location":"api/compute/#brickflow.engine.compute.Cluster.__post_init__","title":"__post_init__() -> None","text":"Source code in brickflow/engine/compute.py
def __post_init__(self) -> None:\nself.validate()\n
"},{"location":"api/compute/#brickflow.engine.compute.Cluster.as_dict","title":"as_dict(is_dlt_cluster: bool = False, allowed_fields: Optional[List[str]] = None, remove_fields: Optional[List[str]] = None) -> Dict[str, Any]","text":"Source code in brickflow/engine/compute.py
def as_dict(\nself,\nis_dlt_cluster: bool = False,\nallowed_fields: Optional[List[str]] = None,\nremove_fields: Optional[List[str]] = None,\n) -> Dict[str, Any]:\nd = dataclasses.asdict(self)\nd = {**d, **self.autoscale(is_dlt_cluster=is_dlt_cluster)}\n# if allowed fields are provided and check if value is in set\nself.cleanup(d, allowed_fields=allowed_fields, remove_fields=remove_fields)\nreturn d\n
"},{"location":"api/compute/#brickflow.engine.compute.Cluster.autoscale","title":"autoscale(is_dlt_cluster: bool = False) -> Dict[str, Any]","text":"Source code in brickflow/engine/compute.py
def autoscale(self, is_dlt_cluster: bool = False) -> Dict[str, Any]:\nif self.min_workers is not None and self.max_workers is not None:\nresp: Dict[str, Dict[str, Optional[str | int]]] = {\n\"autoscale\": {\n\"min_workers\": self.min_workers,\n\"max_workers\": self.max_workers,\n}\n}\nif is_dlt_cluster is True:\nresp[\"autoscale\"][\"mode\"] = self.dlt_auto_scale_mode\nreturn resp\nreturn {}\n
"},{"location":"api/compute/#brickflow.engine.compute.Cluster.cleanup","title":"cleanup(d: Dict[str, Any], allowed_fields: Optional[List[str]] = None, remove_fields: Optional[List[str]] = None) -> None staticmethod","text":"Source code in brickflow/engine/compute.py
@staticmethod\ndef cleanup(\nd: Dict[str, Any],\nallowed_fields: Optional[List[str]] = None,\nremove_fields: Optional[List[str]] = None,\n) -> None:\nd.pop(\"min_workers\", None)\nd.pop(\"max_workers\", None)\nd.pop(\"dlt_auto_scale_mode\", None)\nd.pop(\"existing_cluster_id\", None)\nremove_fields = remove_fields or []\nfor k in list(d.keys()):\n# if allowed fields are provided and check if value is in set\nif allowed_fields and k not in allowed_fields:\nd.pop(k, None)\nif k in remove_fields:\nd.pop(k, None)\n
"},{"location":"api/compute/#brickflow.engine.compute.Cluster.from_existing_cluster","title":"from_existing_cluster(existing_cluster_id: str) -> 'Cluster' classmethod","text":"Source code in brickflow/engine/compute.py
@classmethod\ndef from_existing_cluster(cls, existing_cluster_id: str) -> \"Cluster\":\n# just some stub value\nreturn Cluster(\nexisting_cluster_id,\nexisting_cluster_id,\nexisting_cluster_id,\nexisting_cluster_id=existing_cluster_id,\n)\n
"},{"location":"api/compute/#brickflow.engine.compute.Cluster.validate","title":"validate() -> None","text":"Source code in brickflow/engine/compute.py
def validate(self) -> None:\nassert not (\nself.num_workers is not None\nand self.min_workers is not None\nand self.max_workers is not None\n), \"Num workers should not be provided with min and max workers\"\nassert not (\n(self.min_workers is None and self.max_workers is not None)\nor (self.min_workers is not None and self.max_workers is None)\n), \"Both min workers and max workers should be present if one is provided\"\n# noinspection PyTypeChecker\nassert not (\n(self.min_workers is not None and self.max_workers is not None)\nand (self.min_workers > self.max_workers)\n), \"Min workers should be less than max workers\"\n
"},{"location":"api/compute/#brickflow.engine.compute.Runtimes","title":"brickflow.engine.compute.Runtimes","text":""},{"location":"api/compute/#brickflow.engine.compute.Runtimes-attributes","title":"Attributes","text":""},{"location":"api/compute/#brickflow.engine.compute.Runtimes.RUNTIME_10_4_X_AARCH64_PHOTON_SCALA2_12_LTS","title":"RUNTIME_10_4_X_AARCH64_PHOTON_SCALA2_12_LTS = '10.4.x-aarch64-photon-scala2.12' class-attribute instance-attribute","text":""},{"location":"api/compute/#brickflow.engine.compute.Runtimes.RUNTIME_10_4_X_AARCH64_SCALA2_12_LTS","title":"RUNTIME_10_4_X_AARCH64_SCALA2_12_LTS = '10.4.x-aarch64-scala2.12' class-attribute instance-attribute","text":""},{"location":"api/compute/#brickflow.engine.compute.Runtimes.RUNTIME_10_4_X_CPU_ML_SCALA2_12_LTS","title":"RUNTIME_10_4_X_CPU_ML_SCALA2_12_LTS = '10.4.x-cpu-ml-scala2.12' class-attribute instance-attribute","text":""},{"location":"api/compute/#brickflow.engine.compute.Runtimes.RUNTIME_10_4_X_GPU_ML_SCALA2_12_LTS","title":"RUNTIME_10_4_X_GPU_ML_SCALA2_12_LTS = '10.4.x-gpu-ml-scala2.12' class-attribute instance-attribute","text":""},{"location":"api/compute/#brickflow.engine.compute.Runtimes.RUNTIME_10_4_X_PHOTON_SCALA2_12_LTS","title":"RUNTIME_10_4_X_PHOTON_SCALA2_12_LTS = '10.4.x-photon-scala2.12' class-attribute instance-attribute","text":""},{"location":"api/compute/#brickflow.engine.compute.Runtimes.RUNTIME_10_4_X_SCALA2_12_LTS","title":"RUNTIME_10_4_X_SCALA2_12_LTS = '10.4.x-scala2.12' class-attribute instance-attribute","text":""},{"location":"api/compute/#brickflow.engine.compute.Runtimes.RUNTIME_10_5_X_AARCH64_PHOTON_SCALA2_12","title":"RUNTIME_10_5_X_AARCH64_PHOTON_SCALA2_12 = '10.5.x-aarch64-photon-scala2.12' class-attribute instance-attribute","text":""},{"location":"api/compute/#brickflow.engine.compute.Runtimes.RUNTIME_10_5_X_AARCH64_SCALA2_12","title":"RUNTIME_10_5_X_AARCH64_SCALA2_12 = '10.5.x-aarch64-scala2.12' class-attribute instance-attribute","text":""},{"location":"api/compute/#brickflow.engine.compute.Runtimes.RUNTIME_10_5_X_CPU_ML_SCALA2_12","title":"RUNTIME_10_5_X_CPU_ML_SCALA2_12 = '10.5.x-cpu-ml-scala2.12' class-attribute instance-attribute","text":""},{"location":"api/compute/#brickflow.engine.compute.Runtimes.RUNTIME_10_5_X_GPU_ML_SCALA2_12","title":"RUNTIME_10_5_X_GPU_ML_SCALA2_12 = '10.5.x-gpu-ml-scala2.12' class-attribute instance-attribute","text":""},{"location":"api/compute/#brickflow.engine.compute.Runtimes.RUNTIME_10_5_X_PHOTON_SCALA2_12","title":"RUNTIME_10_5_X_PHOTON_SCALA2_12 = '10.5.x-photon-scala2.12' class-attribute instance-attribute","text":""},{"location":"api/compute/#brickflow.engine.compute.Runtimes.RUNTIME_10_5_X_SCALA2_12","title":"RUNTIME_10_5_X_SCALA2_12 = '10.5.x-scala2.12' class-attribute instance-attribute","text":""},{"location":"api/compute/#brickflow.engine.compute.Runtimes.RUNTIME_11_0_X_AARCH64_PHOTON_SCALA2_12","title":"RUNTIME_11_0_X_AARCH64_PHOTON_SCALA2_12 = '11.0.x-aarch64-photon-scala2.12' class-attribute instance-attribute","text":""},{"location":"api/compute/#brickflow.engine.compute.Runtimes.RUNTIME_11_0_X_AARCH64_SCALA2_12","title":"RUNTIME_11_0_X_AARCH64_SCALA2_12 = '11.0.x-aarch64-scala2.12' class-attribute instance-attribute","text":""},{"location":"api/compute/#brickflow.engine.compute.Runtimes.RUNTIME_11_0_X_CPU_ML_SCALA2_12","title":"RUNTIME_11_0_X_CPU_ML_SCALA2_12 = '11.0.x-cpu-ml-scala2.12' class-attribute instance-attribute","text":""},{"location":"api/compute/#brickflow.engine.compute.Runtimes.RUNTIME_11_0_X_GPU_ML_SCALA2_12","title":"RUNTIME_11_0_X_GPU_ML_SCALA2_12 = '11.0.x-gpu-ml-scala2.12' class-attribute instance-attribute","text":""},{"location":"api/compute/#brickflow.engine.compute.Runtimes.RUNTIME_11_0_X_PHOTON_SCALA2_12","title":"RUNTIME_11_0_X_PHOTON_SCALA2_12 = '11.0.x-photon-scala2.12' class-attribute instance-attribute","text":""},{"location":"api/compute/#brickflow.engine.compute.Runtimes.RUNTIME_11_0_X_SCALA2_12","title":"RUNTIME_11_0_X_SCALA2_12 = '11.0.x-scala2.12' class-attribute instance-attribute","text":""},{"location":"api/compute/#brickflow.engine.compute.Runtimes.RUNTIME_11_1_X_AARCH64_PHOTON_SCALA2_12","title":"RUNTIME_11_1_X_AARCH64_PHOTON_SCALA2_12 = '11.1.x-aarch64-photon-scala2.12' class-attribute instance-attribute","text":""},{"location":"api/compute/#brickflow.engine.compute.Runtimes.RUNTIME_11_1_X_AARCH64_SCALA2_12","title":"RUNTIME_11_1_X_AARCH64_SCALA2_12 = '11.1.x-aarch64-scala2.12' class-attribute instance-attribute","text":""},{"location":"api/compute/#brickflow.engine.compute.Runtimes.RUNTIME_11_1_X_CPU_ML_SCALA2_12","title":"RUNTIME_11_1_X_CPU_ML_SCALA2_12 = '11.1.x-cpu-ml-scala2.12' class-attribute instance-attribute","text":""},{"location":"api/compute/#brickflow.engine.compute.Runtimes.RUNTIME_11_1_X_GPU_ML_SCALA2_12","title":"RUNTIME_11_1_X_GPU_ML_SCALA2_12 = '11.1.x-gpu-ml-scala2.12' class-attribute instance-attribute","text":""},{"location":"api/compute/#brickflow.engine.compute.Runtimes.RUNTIME_11_1_X_PHOTON_SCALA2_12","title":"RUNTIME_11_1_X_PHOTON_SCALA2_12 = '11.1.x-photon-scala2.12' class-attribute instance-attribute","text":""},{"location":"api/compute/#brickflow.engine.compute.Runtimes.RUNTIME_11_1_X_SCALA2_12","title":"RUNTIME_11_1_X_SCALA2_12 = '11.1.x-scala2.12' class-attribute instance-attribute","text":""},{"location":"api/compute/#brickflow.engine.compute.Runtimes.RUNTIME_11_2_X_AARCH64_PHOTON_SCALA2_12","title":"RUNTIME_11_2_X_AARCH64_PHOTON_SCALA2_12 = '11.2.x-aarch64-photon-scala2.12' class-attribute instance-attribute","text":""},{"location":"api/compute/#brickflow.engine.compute.Runtimes.RUNTIME_11_2_X_AARCH64_SCALA2_12","title":"RUNTIME_11_2_X_AARCH64_SCALA2_12 = '11.2.x-aarch64-scala2.12' class-attribute instance-attribute","text":""},{"location":"api/compute/#brickflow.engine.compute.Runtimes.RUNTIME_11_2_X_CPU_ML_SCALA2_12","title":"RUNTIME_11_2_X_CPU_ML_SCALA2_12 = '11.2.x-cpu-ml-scala2.12' class-attribute instance-attribute","text":""},{"location":"api/compute/#brickflow.engine.compute.Runtimes.RUNTIME_11_2_X_GPU_ML_SCALA2_12","title":"RUNTIME_11_2_X_GPU_ML_SCALA2_12 = '11.2.x-gpu-ml-scala2.12' class-attribute instance-attribute","text":""},{"location":"api/compute/#brickflow.engine.compute.Runtimes.RUNTIME_11_2_X_PHOTON_SCALA2_12","title":"RUNTIME_11_2_X_PHOTON_SCALA2_12 = '11.2.x-photon-scala2.12' class-attribute instance-attribute","text":""},{"location":"api/compute/#brickflow.engine.compute.Runtimes.RUNTIME_11_2_X_SCALA2_12","title":"RUNTIME_11_2_X_SCALA2_12 = '11.2.x-scala2.12' class-attribute instance-attribute","text":""},{"location":"api/compute/#brickflow.engine.compute.Runtimes.RUNTIME_11_3_X_AARCH64_PHOTON_SCALA2_12","title":"RUNTIME_11_3_X_AARCH64_PHOTON_SCALA2_12 = '11.3.x-aarch64-photon-scala2.12' class-attribute instance-attribute","text":""},{"location":"api/compute/#brickflow.engine.compute.Runtimes.RUNTIME_11_3_X_AARCH64_SCALA2_12","title":"RUNTIME_11_3_X_AARCH64_SCALA2_12 = '11.3.x-aarch64-scala2.12' class-attribute instance-attribute","text":""},{"location":"api/compute/#brickflow.engine.compute.Runtimes.RUNTIME_11_3_X_CPU_ML_SCALA2_12","title":"RUNTIME_11_3_X_CPU_ML_SCALA2_12 = '11.3.x-cpu-ml-scala2.12' class-attribute instance-attribute","text":""},{"location":"api/compute/#brickflow.engine.compute.Runtimes.RUNTIME_11_3_X_GPU_ML_SCALA2_12","title":"RUNTIME_11_3_X_GPU_ML_SCALA2_12 = '11.3.x-gpu-ml-scala2.12' class-attribute instance-attribute","text":""},{"location":"api/compute/#brickflow.engine.compute.Runtimes.RUNTIME_11_3_X_PHOTON_SCALA2_12","title":"RUNTIME_11_3_X_PHOTON_SCALA2_12 = '11.3.x-photon-scala2.12' class-attribute instance-attribute","text":""},{"location":"api/compute/#brickflow.engine.compute.Runtimes.RUNTIME_11_3_X_SCALA2_12","title":"RUNTIME_11_3_X_SCALA2_12 = '11.3.x-scala2.12' class-attribute instance-attribute","text":""},{"location":"api/compute/#brickflow.engine.compute.Runtimes.RUNTIME_7_3_X_CPU_ML_SCALA2_12_LTS","title":"RUNTIME_7_3_X_CPU_ML_SCALA2_12_LTS = '7.3.x-cpu-ml-scala2.12' class-attribute instance-attribute","text":""},{"location":"api/compute/#brickflow.engine.compute.Runtimes.RUNTIME_7_3_X_GPU_ML_SCALA2_12_LTS","title":"RUNTIME_7_3_X_GPU_ML_SCALA2_12_LTS = '7.3.x-gpu-ml-scala2.12' class-attribute instance-attribute","text":""},{"location":"api/compute/#brickflow.engine.compute.Runtimes.RUNTIME_7_3_X_HLS_SCALA2_12_LTS","title":"RUNTIME_7_3_X_HLS_SCALA2_12_LTS = '7.3.x-hls-scala2.12' class-attribute instance-attribute","text":""},{"location":"api/compute/#brickflow.engine.compute.Runtimes.RUNTIME_7_3_X_SCALA2_12_LTS","title":"RUNTIME_7_3_X_SCALA2_12_LTS = '7.3.x-scala2.12' class-attribute instance-attribute","text":""},{"location":"api/compute/#brickflow.engine.compute.Runtimes.RUNTIME_9_1_X_AARCH64_SCALA2_12_LTS","title":"RUNTIME_9_1_X_AARCH64_SCALA2_12_LTS = '9.1.x-aarch64-scala2.12' class-attribute instance-attribute","text":""},{"location":"api/compute/#brickflow.engine.compute.Runtimes.RUNTIME_9_1_X_CPU_ML_SCALA2_12_LTS","title":"RUNTIME_9_1_X_CPU_ML_SCALA2_12_LTS = '9.1.x-cpu-ml-scala2.12' class-attribute instance-attribute","text":""},{"location":"api/compute/#brickflow.engine.compute.Runtimes.RUNTIME_9_1_X_GPU_ML_SCALA2_12_LTS","title":"RUNTIME_9_1_X_GPU_ML_SCALA2_12_LTS = '9.1.x-gpu-ml-scala2.12' class-attribute instance-attribute","text":""},{"location":"api/compute/#brickflow.engine.compute.Runtimes.RUNTIME_9_1_X_PHOTON_SCALA2_12_LTS","title":"RUNTIME_9_1_X_PHOTON_SCALA2_12_LTS = '9.1.x-photon-scala2.12' class-attribute instance-attribute","text":""},{"location":"api/compute/#brickflow.engine.compute.Runtimes.RUNTIME_9_1_X_SCALA2_12_LTS","title":"RUNTIME_9_1_X_SCALA2_12_LTS = '9.1.x-scala2.12' class-attribute instance-attribute","text":""},{"location":"api/context/","title":"Context","text":""},{"location":"api/context/#brickflow.context.context-attributes","title":"Attributes","text":""},{"location":"api/context/#brickflow.context.context.BRANCH_SKIP_EXCEPT","title":"brickflow.context.context.BRANCH_SKIP_EXCEPT = 'branch_skip_except' module-attribute","text":""},{"location":"api/context/#brickflow.context.context.RETURN_VALUE_KEY","title":"brickflow.context.context.RETURN_VALUE_KEY = 'return_value' module-attribute","text":""},{"location":"api/context/#brickflow.context.context.SKIP_EXCEPT_HACK","title":"brickflow.context.context.SKIP_EXCEPT_HACK = 'brickflow_hack_skip_all' module-attribute","text":""},{"location":"api/context/#brickflow.context.context.T","title":"brickflow.context.context.T = TypeVar('T') module-attribute","text":""},{"location":"api/context/#brickflow.context.context.ctx","title":"brickflow.context.context.ctx = Context() module-attribute","text":""},{"location":"api/context/#brickflow.context.context-classes","title":"Classes","text":""},{"location":"api/context/#brickflow.context.context.BrickflowBuiltInTaskVariables","title":"brickflow.context.context.BrickflowBuiltInTaskVariables","text":"

Bases: Enum

"},{"location":"api/context/#brickflow.context.context.BrickflowBuiltInTaskVariables-attributes","title":"Attributes","text":""},{"location":"api/context/#brickflow.context.context.BrickflowBuiltInTaskVariables.job_id","title":"job_id = 'brickflow_job_id' class-attribute instance-attribute","text":""},{"location":"api/context/#brickflow.context.context.BrickflowBuiltInTaskVariables.parent_run_id","title":"parent_run_id = 'brickflow_parent_run_id' class-attribute instance-attribute","text":""},{"location":"api/context/#brickflow.context.context.BrickflowBuiltInTaskVariables.run_id","title":"run_id = 'brickflow_run_id' class-attribute instance-attribute","text":""},{"location":"api/context/#brickflow.context.context.BrickflowBuiltInTaskVariables.start_date","title":"start_date = 'brickflow_start_date' class-attribute instance-attribute","text":""},{"location":"api/context/#brickflow.context.context.BrickflowBuiltInTaskVariables.start_time","title":"start_time = 'brickflow_start_time' class-attribute instance-attribute","text":""},{"location":"api/context/#brickflow.context.context.BrickflowBuiltInTaskVariables.task_key","title":"task_key = 'brickflow_task_key' class-attribute instance-attribute","text":""},{"location":"api/context/#brickflow.context.context.BrickflowBuiltInTaskVariables.task_retry_count","title":"task_retry_count = 'brickflow_task_retry_count' class-attribute instance-attribute","text":""},{"location":"api/context/#brickflow.context.context.BrickflowInternalVariables","title":"brickflow.context.context.BrickflowInternalVariables","text":"

Bases: Enum

"},{"location":"api/context/#brickflow.context.context.BrickflowInternalVariables-attributes","title":"Attributes","text":""},{"location":"api/context/#brickflow.context.context.BrickflowInternalVariables.env","title":"env = BrickflowEnvVars.BRICKFLOW_ENV.value.lower() class-attribute instance-attribute","text":""},{"location":"api/context/#brickflow.context.context.BrickflowInternalVariables.only_run_tasks","title":"only_run_tasks = 'brickflow_internal_only_run_tasks' class-attribute instance-attribute","text":""},{"location":"api/context/#brickflow.context.context.BrickflowInternalVariables.task_id","title":"task_id = 'brickflow_internal_task_name' class-attribute instance-attribute","text":""},{"location":"api/context/#brickflow.context.context.BrickflowInternalVariables.workflow_id","title":"workflow_id = 'brickflow_internal_workflow_name' class-attribute instance-attribute","text":""},{"location":"api/context/#brickflow.context.context.BrickflowInternalVariables.workflow_prefix","title":"workflow_prefix = 'brickflow_internal_workflow_prefix' class-attribute instance-attribute","text":""},{"location":"api/context/#brickflow.context.context.BrickflowInternalVariables.workflow_suffix","title":"workflow_suffix = 'brickflow_internal_workflow_suffix' class-attribute instance-attribute","text":""},{"location":"api/context/#brickflow.context.context.BrickflowTaskComs","title":"brickflow.context.context.BrickflowTaskComs dataclass","text":""},{"location":"api/context/#brickflow.context.context.BrickflowTaskComs-attributes","title":"Attributes","text":""},{"location":"api/context/#brickflow.context.context.BrickflowTaskComs.dbutils","title":"dbutils: Optional[Any] = None class-attribute instance-attribute","text":""},{"location":"api/context/#brickflow.context.context.BrickflowTaskComs.storage","title":"storage: Dict[str, Any] = field(init=False, default_factory=lambda : {}) class-attribute instance-attribute","text":""},{"location":"api/context/#brickflow.context.context.BrickflowTaskComs-functions","title":"Functions","text":""},{"location":"api/context/#brickflow.context.context.BrickflowTaskComs.get","title":"get(task_id: str, key: Optional[str] = None) -> Any","text":"Source code in brickflow/context/context.py
def get(self, task_id: str, key: Optional[str] = None) -> Any:\nif key is None:\nreturn BrickflowTaskComsDict(task_id=task_id, task_coms=self)\nif self.dbutils is not None:\nencoded_value = self.dbutils.jobs.taskValues.get(\nkey=key, taskKey=task_id, debugValue=\"debug\"\n)\nreturn BrickflowTaskComsObject.from_encoded_value(encoded_value).value\nelse:\n# TODO: logging using local task coms\nencoded_value = self.storage[self._key(task_id, key)]\nreturn BrickflowTaskComsObject.from_encoded_value(encoded_value).value\n
"},{"location":"api/context/#brickflow.context.context.BrickflowTaskComs.put","title":"put(task_id: str, key: str, value: Any) -> None","text":"Source code in brickflow/context/context.py
def put(self, task_id: str, key: str, value: Any) -> None:\nencoded_value = BrickflowTaskComsObject(value).to_encoded_value\nif self.dbutils is not None:\nself.dbutils.jobs.taskValues.set(key, encoded_value)\nelse:\n# TODO: logging using local task coms\nself.storage[self._key(task_id, key)] = encoded_value\n
"},{"location":"api/context/#brickflow.context.context.BrickflowTaskComsDict","title":"brickflow.context.context.BrickflowTaskComsDict dataclass","text":""},{"location":"api/context/#brickflow.context.context.BrickflowTaskComsDict-attributes","title":"Attributes","text":""},{"location":"api/context/#brickflow.context.context.BrickflowTaskComsDict.task_coms","title":"task_coms: BrickflowTaskComs instance-attribute","text":""},{"location":"api/context/#brickflow.context.context.BrickflowTaskComsDict.task_id","title":"task_id: str instance-attribute","text":""},{"location":"api/context/#brickflow.context.context.BrickflowTaskComsObject","title":"brickflow.context.context.BrickflowTaskComsObject dataclass","text":""},{"location":"api/context/#brickflow.context.context.BrickflowTaskComsObject-attributes","title":"Attributes","text":""},{"location":"api/context/#brickflow.context.context.BrickflowTaskComsObject.to_encoded_value","title":"to_encoded_value: str property","text":""},{"location":"api/context/#brickflow.context.context.BrickflowTaskComsObject.value","title":"value: Any property","text":""},{"location":"api/context/#brickflow.context.context.BrickflowTaskComsObject-functions","title":"Functions","text":""},{"location":"api/context/#brickflow.context.context.BrickflowTaskComsObject.from_encoded_value","title":"from_encoded_value(encoded_value: Union[str, bytes]) -> BrickflowTaskComsObject classmethod","text":"Source code in brickflow/context/context.py
@classmethod\ndef from_encoded_value(\ncls, encoded_value: Union[str, bytes]\n) -> \"BrickflowTaskComsObject\":\ntry:\n_encoded_value = (\nencoded_value\nif isinstance(encoded_value, bytes)\nelse encoded_value.encode(\"utf-8\")\n)\nb64_bytes = base64.b64decode(_encoded_value)\nreturn cls(pickle.loads(b64_bytes).value)\nexcept binascii.Error:\n_decoded_value = (\nencoded_value.decode(\"utf-8\")\nif isinstance(encoded_value, bytes)\nelse encoded_value\n)\nreturn cls(_decoded_value)\n
"},{"location":"api/context/#brickflow.context.context.Context","title":"brickflow.context.context.Context() -> None","text":"Source code in brickflow/context/context.py
def __init__(self) -> None:\n# Order of init matters todo: fix this\nself._dbutils: Optional[Any] = None\nself._spark: Optional[Any] = None\nself._task_coms: BrickflowTaskComs\nself._current_task: Optional[str] = None\nself._configure()\nself._current_project: Optional[str] = None\n
"},{"location":"api/context/#brickflow.context.context.Context-attributes","title":"Attributes","text":""},{"location":"api/context/#brickflow.context.context.Context.current_project","title":"current_project: Optional[str] property","text":""},{"location":"api/context/#brickflow.context.context.Context.current_task","title":"current_task: Optional[str] property","text":""},{"location":"api/context/#brickflow.context.context.Context.dbutils","title":"dbutils: DBUtils property","text":""},{"location":"api/context/#brickflow.context.context.Context.env","title":"env: str property","text":""},{"location":"api/context/#brickflow.context.context.Context.log","title":"log: logging.Logger property","text":""},{"location":"api/context/#brickflow.context.context.Context.spark","title":"spark: SparkSession property","text":""},{"location":"api/context/#brickflow.context.context.Context.task_coms","title":"task_coms: BrickflowTaskComs property","text":""},{"location":"api/context/#brickflow.context.context.Context-functions","title":"Functions","text":""},{"location":"api/context/#brickflow.context.context.Context.dbutils_widget_get_or_else","title":"dbutils_widget_get_or_else(key: str, debug: Optional[str]) -> Optional[str]","text":"Source code in brickflow/context/context.py
@deprecated\ndef dbutils_widget_get_or_else(\nself, key: str, debug: Optional[str]\n) -> Optional[str]:\ntry:\nreturn self.dbutils.widgets.get(key)\nexcept Exception:\n# todo: log error\nreturn debug\n
"},{"location":"api/context/#brickflow.context.context.Context.get_by_env","title":"get_by_env(purpose: str, *, default: Optional[T] = None, local: Optional[T] = None, dev: Optional[T] = None, non_prod: Optional[T] = None, test: Optional[T] = None, qa: Optional[T] = None, prod: Optional[T] = None, uat: Optional[T] = None, **kwargs: Optional[T]) -> Optional[T]","text":"Source code in brickflow/context/context.py
def get_by_env(\nself,\npurpose: str,\n*,\ndefault: Optional[T] = None,\nlocal: Optional[T] = None,\ndev: Optional[T] = None,\nnon_prod: Optional[T] = None,\ntest: Optional[T] = None,\nqa: Optional[T] = None,\nprod: Optional[T] = None,\nuat: Optional[T] = None,\n**kwargs: Optional[T],\n) -> Optional[T]:\n# deep copy without modifying kwargs\ndef add_if_not_none(\n_d: Dict[str, Optional[T]], _k: str, _v: Optional[T]\n) -> None:\nif _v is None:\nreturn\n_d[_k] = _v\n_dict = copy.deepcopy(kwargs)\nadd_if_not_none(_dict, \"local\", local)\nadd_if_not_none(_dict, \"non_prod\", non_prod)\nadd_if_not_none(_dict, \"dev\", dev)\nadd_if_not_none(_dict, \"test\", test)\nadd_if_not_none(_dict, \"qa\", qa)\nadd_if_not_none(_dict, \"prod\", prod)\nadd_if_not_none(_dict, \"uat\", uat)\n_env = self.env\n_ilog.info(\"Configuring: %s; Using env: '%s' to fetch value...\", purpose, _env)\nif _env not in _dict and default is None:\nraise KeyError(\nf\"Configuring: {purpose}; Unable to find environment key: {_env}, \"\nf\"only found env definitions: {list(_dict.keys())}\"\n)\nif _env not in _dict and default is not None:\n_ilog.info(\n\"Configuring: %s; Found no value configured with env: '%s' using default value...\",\npurpose,\n_env,\n)\nres = _dict.get(_env, default)\nreturn res\n
"},{"location":"api/context/#brickflow.context.context.Context.get_parameter","title":"get_parameter(key: str, debug: Optional[str] = None) -> Optional[str]","text":"Source code in brickflow/context/context.py
def get_parameter(self, key: str, debug: Optional[str] = None) -> Optional[str]:\ntry:\nreturn self.dbutils.widgets.get(key)\nexcept Exception:\n# todo: log error\n_ilog.debug(\"Unable to get parameter: %s from dbutils\", key)\nreturn debug\n
"},{"location":"api/context/#brickflow.context.context.Context.get_return_value","title":"get_return_value(task_key: Union[str, Callable]) -> Any","text":"Source code in brickflow/context/context.py
def get_return_value(self, task_key: Union[str, Callable]) -> Any:\ntask_key = task_key.__name__ if callable(task_key) else task_key\nreturn self.task_coms.get(task_key, RETURN_VALUE_KEY)\n
"},{"location":"api/context/#brickflow.context.context.Context.is_local","title":"is_local() -> bool","text":"Source code in brickflow/context/context.py
def is_local(self) -> bool:\nreturn self.env == BrickflowDefaultEnvs.LOCAL.value\n
"},{"location":"api/context/#brickflow.context.context.Context.job_id","title":"job_id(*, debug: Optional[str] = None) -> Any","text":"

This function fetches the job_id value using the bind_variable decorator. The implementation is intentionally empty because the decorator handles the logic.

Source code in brickflow/context/context.py
@bind_variable(BrickflowBuiltInTaskVariables.job_id)\ndef job_id(self, *, debug: Optional[str] = None) -> Any:\n\"\"\"\n    This function fetches the job_id value using the bind_variable decorator.\n    The implementation is intentionally empty because the decorator handles the logic.\n    \"\"\"\npass\n
"},{"location":"api/context/#brickflow.context.context.Context.parent_run_id","title":"parent_run_id(*, debug: Optional[str] = None) -> Any","text":"

This function fetches the parent_run_id value using the bind_variable decorator. The implementation is intentionally empty because the decorator handles the logic.

Source code in brickflow/context/context.py
@bind_variable(BrickflowBuiltInTaskVariables.parent_run_id)\ndef parent_run_id(self, *, debug: Optional[str] = None) -> Any:\n\"\"\"\n    This function fetches the parent_run_id value using the bind_variable decorator.\n    The implementation is intentionally empty because the decorator handles the logic.\n    \"\"\"\npass\n
"},{"location":"api/context/#brickflow.context.context.Context.run_id","title":"run_id(*, debug: Optional[str] = None) -> Any","text":"

This function fetches the run_id value using the bind_variable decorator. The implementation is intentionally empty because the decorator handles the logic.

Source code in brickflow/context/context.py
@bind_variable(BrickflowBuiltInTaskVariables.run_id)\ndef run_id(self, *, debug: Optional[str] = None) -> Any:\n\"\"\"\n    This function fetches the run_id value using the bind_variable decorator.\n    The implementation is intentionally empty because the decorator handles the logic.\n    \"\"\"\npass\n
"},{"location":"api/context/#brickflow.context.context.Context.set_current_project","title":"set_current_project(project: str) -> None","text":"Source code in brickflow/context/context.py
def set_current_project(self, project: str) -> None:\nself._current_project = project\n
"},{"location":"api/context/#brickflow.context.context.Context.skip_all_except","title":"skip_all_except(branch_task: Union[Callable, str]) -> None","text":"Source code in brickflow/context/context.py
def skip_all_except(self, branch_task: Union[Callable, str]) -> None:\nif self._current_task is None:\nraise RuntimeError(\"Current task is empty unable to skip...\")\nbranch_task_key = (\nbranch_task.__name__\nif callable(branch_task) and hasattr(branch_task, \"__name__\") is True\nelse branch_task\n)\nself._task_coms.put(self._current_task, BRANCH_SKIP_EXCEPT, branch_task_key)\n
"},{"location":"api/context/#brickflow.context.context.Context.skip_all_following","title":"skip_all_following() -> None","text":"Source code in brickflow/context/context.py
def skip_all_following(self) -> None:\nif self._current_task is None:\nraise RuntimeError(\"Current task is empty unable to skip...\")\nself._task_coms.put(self._current_task, BRANCH_SKIP_EXCEPT, SKIP_EXCEPT_HACK)\n
"},{"location":"api/context/#brickflow.context.context.Context.start_date","title":"start_date(*, debug: Optional[str] = None) -> Any","text":"

This function fetches the start_date value using the bind_variable decorator. The implementation is intentionally empty because the decorator handles the logic.

Source code in brickflow/context/context.py
@bind_variable(BrickflowBuiltInTaskVariables.start_date)\ndef start_date(self, *, debug: Optional[str] = None) -> Any:\n\"\"\"\n    This function fetches the start_date value using the bind_variable decorator.\n    The implementation is intentionally empty because the decorator handles the logic.\n    \"\"\"\npass\n
"},{"location":"api/context/#brickflow.context.context.Context.start_time","title":"start_time(*, debug: Optional[str] = None) -> Any","text":"

This function fetches the start_time value using the bind_variable decorator. The implementation is intentionally empty because the decorator handles the logic.

Source code in brickflow/context/context.py
@bind_variable(BrickflowBuiltInTaskVariables.start_time)\ndef start_time(self, *, debug: Optional[str] = None) -> Any:\n\"\"\"\n    This function fetches the start_time value using the bind_variable decorator.\n    The implementation is intentionally empty because the decorator handles the logic.\n    \"\"\"\npass\n
"},{"location":"api/context/#brickflow.context.context.Context.task_key","title":"task_key(*, debug: Optional[str] = None) -> Any","text":"

This function fetches the task_key value using the bind_variable decorator. The implementation is intentionally empty because the decorator handles the logic.

Source code in brickflow/context/context.py
@bind_variable(BrickflowBuiltInTaskVariables.task_key)\ndef task_key(self, *, debug: Optional[str] = None) -> Any:\n\"\"\"\n    This function fetches the task_key value using the bind_variable decorator.\n    The implementation is intentionally empty because the decorator handles the logic.\n    \"\"\"\npass\n
"},{"location":"api/context/#brickflow.context.context.Context.task_retry_count","title":"task_retry_count(*, debug: Optional[str] = None) -> Any","text":"

This function fetches the task_retry_count value using the bind_variable decorator. The implementation is intentionally empty because the decorator handles the logic.

Source code in brickflow/context/context.py
@bind_variable(BrickflowBuiltInTaskVariables.task_retry_count)\ndef task_retry_count(self, *, debug: Optional[str] = None) -> Any:\n\"\"\"\n    This function fetches the task_retry_count value using the bind_variable decorator.\n    The implementation is intentionally empty because the decorator handles the logic.\n    \"\"\"\npass\n
"},{"location":"api/context/#brickflow.context.context.ContextMode","title":"brickflow.context.context.ContextMode","text":"

Bases: Enum

"},{"location":"api/context/#brickflow.context.context.ContextMode-attributes","title":"Attributes","text":""},{"location":"api/context/#brickflow.context.context.ContextMode.databricks","title":"databricks = 'databricks' class-attribute instance-attribute","text":""},{"location":"api/context/#brickflow.context.context.ContextMode.not_databricks","title":"not_databricks = 'not_databricks' class-attribute instance-attribute","text":""},{"location":"api/context/#brickflow.context.context.TaskComsObjectResult","title":"brickflow.context.context.TaskComsObjectResult","text":"

Bases: Enum

"},{"location":"api/context/#brickflow.context.context.TaskComsObjectResult-attributes","title":"Attributes","text":""},{"location":"api/context/#brickflow.context.context.TaskComsObjectResult.NO_RESULTS","title":"NO_RESULTS = 'NO_RESULTS' class-attribute instance-attribute","text":""},{"location":"api/context/#brickflow.context.context-functions","title":"Functions","text":""},{"location":"api/context/#brickflow.context.context.bind_variable","title":"brickflow.context.context.bind_variable(builtin: BrickflowBuiltInTaskVariables) -> Callable","text":"Source code in brickflow/context/context.py
def bind_variable(builtin: BrickflowBuiltInTaskVariables) -> Callable:\ndef wrapper(f: Callable) -> Callable:\n@functools.wraps(f)\ndef func(*args, **kwargs):  # type: ignore\n_self: Context = args[0]  # type: ignore\ndebug = kwargs[\"debug\"]\nf(*args, **kwargs)  # no-op\nif _self.dbutils is not None:\nreturn _self.get_parameter(builtin.value, debug)\nreturn debug\nreturn func\nreturn wrapper\n
"},{"location":"api/misc/","title":"Misc","text":""},{"location":"api/project/","title":"Project","text":""},{"location":"api/project/#brickflow.engine.project-classes","title":"Classes","text":""},{"location":"api/project/#brickflow.engine.project.Project","title":"brickflow.engine.project.Project dataclass","text":""},{"location":"api/project/#brickflow.engine.project.Project-attributes","title":"Attributes","text":""},{"location":"api/project/#brickflow.engine.project.Project.batch","title":"batch: bool = True class-attribute instance-attribute","text":""},{"location":"api/project/#brickflow.engine.project.Project.bundle_base_path","title":"bundle_base_path: Optional[str] = None class-attribute instance-attribute","text":""},{"location":"api/project/#brickflow.engine.project.Project.bundle_obj_name","title":"bundle_obj_name: Optional[str] = None class-attribute instance-attribute","text":""},{"location":"api/project/#brickflow.engine.project.Project.codegen_kwargs","title":"codegen_kwargs: Optional[Dict[str, Any]] = None class-attribute instance-attribute","text":""},{"location":"api/project/#brickflow.engine.project.Project.codegen_mechanism","title":"codegen_mechanism: Optional[Type[CodegenInterface]] = None class-attribute instance-attribute","text":""},{"location":"api/project/#brickflow.engine.project.Project.debug_execute_task","title":"debug_execute_task: Optional[str] = None class-attribute instance-attribute","text":""},{"location":"api/project/#brickflow.engine.project.Project.debug_execute_workflow","title":"debug_execute_workflow: Optional[str] = None class-attribute instance-attribute","text":""},{"location":"api/project/#brickflow.engine.project.Project.entry_point_path","title":"entry_point_path: Optional[str] = None class-attribute instance-attribute","text":""},{"location":"api/project/#brickflow.engine.project.Project.git_reference","title":"git_reference: Optional[str] = None class-attribute instance-attribute","text":""},{"location":"api/project/#brickflow.engine.project.Project.git_repo","title":"git_repo: Optional[str] = None class-attribute instance-attribute","text":""},{"location":"api/project/#brickflow.engine.project.Project.libraries","title":"libraries: Optional[List[TaskLibrary]] = None class-attribute instance-attribute","text":""},{"location":"api/project/#brickflow.engine.project.Project.mode","title":"mode: Optional[str] = None class-attribute instance-attribute","text":""},{"location":"api/project/#brickflow.engine.project.Project.name","title":"name: str instance-attribute","text":""},{"location":"api/project/#brickflow.engine.project.Project.provider","title":"provider: Optional[str] = None class-attribute instance-attribute","text":""},{"location":"api/project/#brickflow.engine.project.Project.s3_backend","title":"s3_backend: Optional[Dict[str, str]] = None class-attribute instance-attribute","text":""},{"location":"api/project/#brickflow.engine.project.Project-functions","title":"Functions","text":""},{"location":"api/project/#brickflow.engine.project.Project.__enter__","title":"__enter__() -> _Project","text":"Source code in brickflow/engine/project.py
def __enter__(self) -> \"_Project\":\nself._project = _Project(\nself.name,\nself.git_repo,\nself.provider,\nself.git_reference,\nself.s3_backend,\nself.entry_point_path,\nlibraries=self.libraries or [],\nbatch=self.batch,\nbundle_obj_name=self.bundle_obj_name,\nbundle_base_path=self.bundle_base_path,\n)\nreturn self._project\n
"},{"location":"api/project/#brickflow.engine.project.Project.__exit__","title":"__exit__(exc_type, exc_val, exc_tb) -> None","text":"Source code in brickflow/engine/project.py
def __exit__(self, exc_type, exc_val, exc_tb) -> None:  # type: ignore\nif exc_type is not None:\nerror_types = {Stage.deploy: DeployError, Stage.execute: ExecuteError}\nraise error_types[self._mode](\nf\"Oops... failed during: {self._mode}\"\n) from exc_val\nif len(self._project.workflows) == 0:\n_ilog.info(\"Doing nothing no workflows...\")\nreturn\nif self._mode.value == Stage.deploy.value:\n_ilog.info(\"Deploying changes... to %s\", ctx.env)\nif self.codegen_mechanism is None:\nraise ValueError(\n\"codegen_mechanism cannot be None; please raise a github issue for this.\"\n)\ncodegen = self.codegen_mechanism(\nproject=self._project,\nid_=f\"{ctx.env}_{self.name}\",\nenv=ctx.env,\n**(self.codegen_kwargs or {}),\n)\ncodegen.synth()\nif self._mode.value == Stage.execute.value:\nwf_id = ctx.get_parameter(\nBrickflowInternalVariables.workflow_id.value,\nself.debug_execute_workflow,\n)\nt_id = ctx.get_parameter(\nBrickflowInternalVariables.task_id.value, self.debug_execute_task\n)\nif wf_id is None or t_id is None:\n_ilog.info(\n\"No workflow id or task key was able to found; doing nothing...\"\n)\nreturn\nworkflow = self._project.get_workflow(wf_id)\ntask = workflow.get_task(t_id)\ntask.execute()\n
"},{"location":"api/project/#brickflow.engine.project.Project.__post_init__","title":"__post_init__() -> None","text":"Source code in brickflow/engine/project.py
def __post_init__(self) -> None:\nself._mode = Stage[\nconfig(BrickflowEnvVars.BRICKFLOW_MODE.value, default=Stage.execute.value)\n]\nself.entry_point_path = self.entry_point_path or get_caller_info()\n# setup current_project\nenv_project_name = config(\nBrickflowEnvVars.BRICKFLOW_PROJECT_NAME.value, default=None\n)\nif (\nenv_project_name is not None\nand self.name is not None\nand env_project_name != self.name\n):\nraise ValueError(\n\"Project name in config files and entrypoint must be the same\"\n)\nctx.set_current_project(self.name or env_project_name)  # always setup first\n# populate bundle info via env vars\nself.bundle_obj_name = config(\nBrickflowEnvVars.BRICKFLOW_BUNDLE_OBJ_NAME.value,\ndefault=\".brickflow_bundles\",\n)\nself.bundle_base_path = config(\nBrickflowEnvVars.BRICKFLOW_BUNDLE_BASE_PATH.value,\ndefault=\"/Users/${workspace.current_user.userName}\",\n)\nself.git_reference = config(\nBrickflowEnvVars.BRICKFLOW_GIT_REF.value, default=self.get_git_ref()\n)\nif (\nself._mode == Stage.deploy\nand ctx.is_local() is False\nand self.git_reference is None\n):\nraise ValueError(\n\"git_reference must be set when deploying to non-local envs\"\n)\nself.provider = config(\nBrickflowEnvVars.BRICKFLOW_GIT_PROVIDER.value, default=self.provider\n)\nself.git_repo = config(\nBrickflowEnvVars.BRICKFLOW_GIT_REPO.value, default=self.git_repo\n)\nif self.s3_backend is None:\nself.s3_backend = {\n\"bucket\": config(\"BRICKFLOW_S3_BACKEND_BUCKET\", default=None),\n\"key\": config(\"BRICKFLOW_S3_BACKEND_KEY\", default=None),\n\"region\": config(\"BRICKFLOW_S3_BACKEND_REGION\", default=None),\n\"dynamodb_table\": config(\n\"BRICKFLOW_S3_BACKEND_DYNAMODB_TABLE\", default=None\n),\n}\nif all(value is None for value in self.s3_backend.values()):\nself.s3_backend = None\ndeployment_mode = config(\nBrickflowEnvVars.BRICKFLOW_DEPLOYMENT_MODE.value, default=\"cdktf\"\n)\nif deployment_mode == BrickflowDeployMode.CDKTF.value:\nself.codegen_mechanism = HashicorpCDKTFGen\nelif deployment_mode == BrickflowDeployMode.BUNDLE.value:\nself.codegen_mechanism = DatabricksBundleCodegen\nif self.codegen_kwargs is None:\nself.codegen_kwargs = {}\n
"},{"location":"api/project/#brickflow.engine.project.Project.get_git_ref","title":"get_git_ref() -> Optional[str]","text":"Source code in brickflow/engine/project.py
def get_git_ref(self) -> Optional[str]:\nif self._mode == Stage.deploy:\nif self.git_reference is not None:\nreturn self.git_reference\nelse:\ntry:\nreturn f\"commit/{get_current_commit()}\"\nexcept Exception:\n_ilog.warning(\n\"Unable to get current commit; defaulting to empty string\"\n)\nreturn \"commit/fake-local-stub\" if ctx.is_local() else None\nelse:\nreturn self.git_reference if self.git_reference is not None else \"\"\n
"},{"location":"api/secrets/","title":"Secrets","text":""},{"location":"api/secrets/#brickflow_plugins.secrets-attributes","title":"Attributes","text":""},{"location":"api/secrets/#brickflow_plugins.secrets.BRICKFLOW_SECRETS_BACKEND","title":"brickflow_plugins.secrets.BRICKFLOW_SECRETS_BACKEND = 'brickflow_secrets_backend' module-attribute","text":""},{"location":"api/secrets/#brickflow_plugins.secrets.brickflow_secrets_backend_plugin_impl","title":"brickflow_plugins.secrets.brickflow_secrets_backend_plugin_impl = pluggy.HookimplMarker(BRICKFLOW_SECRETS_BACKEND) module-attribute","text":""},{"location":"api/secrets/#brickflow_plugins.secrets.brickflow_secrets_plugin_spec","title":"brickflow_plugins.secrets.brickflow_secrets_plugin_spec = pluggy.HookspecMarker(BRICKFLOW_SECRETS_BACKEND) module-attribute","text":""},{"location":"api/secrets/#brickflow_plugins.secrets-classes","title":"Classes","text":""},{"location":"api/secrets/#brickflow_plugins.secrets.AbstractSecretsHelper","title":"brickflow_plugins.secrets.AbstractSecretsHelper","text":"

Bases: abc.ABC

"},{"location":"api/secrets/#brickflow_plugins.secrets.AbstractSecretsHelper-attributes","title":"Attributes","text":""},{"location":"api/secrets/#brickflow_plugins.secrets.AbstractSecretsHelper.PROTOCOL_STARTS_WITH","title":"PROTOCOL_STARTS_WITH: Optional[Union[str, List[str]]] = None class-attribute instance-attribute","text":""},{"location":"api/secrets/#brickflow_plugins.secrets.AbstractSecretsHelper-functions","title":"Functions","text":""},{"location":"api/secrets/#brickflow_plugins.secrets.AbstractSecretsHelper.get_secret_value_from_url","title":"get_secret_value_from_url(url_parsed_result: ParseResult)","text":"Source code in brickflow_plugins/secrets/__init__.py
def get_secret_value_from_url(self, url_parsed_result: ParseResult):\nallowed_protocols = (\n[self.PROTOCOL_STARTS_WITH]\nif isinstance(self.PROTOCOL_STARTS_WITH, str)\nelse self.PROTOCOL_STARTS_WITH\n)\nif self.PROTOCOL_STARTS_WITH is not None and not any(\n[\nurl_parsed_result.scheme.lower().startswith(protocol)\nfor protocol in allowed_protocols\n]\n):\nreturn None\nreturn self._get_secret_value_from_url(url_parsed_result)\n
"},{"location":"api/secrets/#brickflow_plugins.secrets.B64SecretsHelper","title":"brickflow_plugins.secrets.B64SecretsHelper","text":"

Bases: AbstractSecretsHelper

"},{"location":"api/secrets/#brickflow_plugins.secrets.B64SecretsHelper-attributes","title":"Attributes","text":""},{"location":"api/secrets/#brickflow_plugins.secrets.B64SecretsHelper.PROTOCOL_STARTS_WITH","title":"PROTOCOL_STARTS_WITH = ['base64', 'b64'] class-attribute instance-attribute","text":""},{"location":"api/secrets/#brickflow_plugins.secrets.B64SecretsHelper-functions","title":"Functions","text":""},{"location":"api/secrets/#brickflow_plugins.secrets.Base64BrickflowSecretPluginImpl","title":"brickflow_plugins.secrets.Base64BrickflowSecretPluginImpl","text":"

Bases: BrickflowSecretPluginSpec

"},{"location":"api/secrets/#brickflow_plugins.secrets.Base64BrickflowSecretPluginImpl-functions","title":"Functions","text":""},{"location":"api/secrets/#brickflow_plugins.secrets.Base64BrickflowSecretPluginImpl.get_secret_value","title":"get_secret_value(url_parsed_result: ParseResult) -> Optional['str'] staticmethod","text":"Source code in brickflow_plugins/secrets/__init__.py
@staticmethod\n@brickflow_secrets_backend_plugin_impl\ndef get_secret_value(url_parsed_result: ParseResult) -> Optional[\"str\"]:\nreturn B64SecretsHelper().get_secret_value_from_url(url_parsed_result)\n
"},{"location":"api/secrets/#brickflow_plugins.secrets.BrickflowSecretPluginSpec","title":"brickflow_plugins.secrets.BrickflowSecretPluginSpec","text":""},{"location":"api/secrets/#brickflow_plugins.secrets.BrickflowSecretPluginSpec-functions","title":"Functions","text":""},{"location":"api/secrets/#brickflow_plugins.secrets.BrickflowSecretPluginSpec.get_secret_value","title":"get_secret_value(url_parsed_result: ParseResult) -> Optional['str'] staticmethod","text":"

Custom execute method that is able to be plugged in.

Source code in brickflow_plugins/secrets/__init__.py
@staticmethod\n@brickflow_secrets_plugin_spec(firstresult=True)\ndef get_secret_value(url_parsed_result: ParseResult) -> Optional[\"str\"]:\n\"\"\"Custom execute method that is able to be plugged in.\"\"\"\n
"},{"location":"api/secrets/#brickflow_plugins.secrets.BrickflowSecretsBackend","title":"brickflow_plugins.secrets.BrickflowSecretsBackend","text":"

Bases: BaseSecretsBackend

"},{"location":"api/secrets/#brickflow_plugins.secrets.BrickflowSecretsBackend-functions","title":"Functions","text":""},{"location":"api/secrets/#brickflow_plugins.secrets.BrickflowSecretsBackend.get_conn_value","title":"get_conn_value(conn_id: str) -> str | None","text":"Source code in brickflow_plugins/secrets/__init__.py
def get_conn_value(self, conn_id: str) -> str | None:\nparsed_url = urlparse(conn_id)\nreturn get_brickflow_tasks_hook().get_secret_value(url_parsed_result=parsed_url)\n
"},{"location":"api/secrets/#brickflow_plugins.secrets.BrickflowSecretsBackend.set_backend_env","title":"set_backend_env()","text":"Source code in brickflow_plugins/secrets/__init__.py
def set_backend_env(self):\nfor k, v in self._get_secrets_backend_env().items():\nos.environ[k] = v\n
"},{"location":"api/secrets/#brickflow_plugins.secrets.BrickflowSecretsBackend.unset_backend_env","title":"unset_backend_env()","text":"Source code in brickflow_plugins/secrets/__init__.py
def unset_backend_env(self):\nfor k in self._get_secrets_backend_env().keys():\nos.environ.pop(k, None)\n
"},{"location":"api/secrets/#brickflow_plugins.secrets.CerberusBrickflowSecretPluginImpl","title":"brickflow_plugins.secrets.CerberusBrickflowSecretPluginImpl","text":"

Bases: BrickflowSecretPluginSpec

"},{"location":"api/secrets/#brickflow_plugins.secrets.CerberusBrickflowSecretPluginImpl-functions","title":"Functions","text":""},{"location":"api/secrets/#brickflow_plugins.secrets.CerberusBrickflowSecretPluginImpl.get_secret_value","title":"get_secret_value(url_parsed_result: ParseResult) -> Optional['str'] staticmethod","text":"Source code in brickflow_plugins/secrets/__init__.py
@staticmethod\n@brickflow_secrets_backend_plugin_impl\ndef get_secret_value(url_parsed_result: ParseResult) -> Optional[\"str\"]:\nreturn CerberusSecretsHelper().get_secret_value_from_url(url_parsed_result)\n
"},{"location":"api/secrets/#brickflow_plugins.secrets.CerberusSecretsHelper","title":"brickflow_plugins.secrets.CerberusSecretsHelper","text":"

Bases: AbstractSecretsHelper

"},{"location":"api/secrets/#brickflow_plugins.secrets.CerberusSecretsHelper-attributes","title":"Attributes","text":""},{"location":"api/secrets/#brickflow_plugins.secrets.CerberusSecretsHelper.PROTOCOL_STARTS_WITH","title":"PROTOCOL_STARTS_WITH = 'cerberus' class-attribute instance-attribute","text":""},{"location":"api/secrets/#brickflow_plugins.secrets.CerberusSecretsHelper-functions","title":"Functions","text":""},{"location":"api/secrets/#brickflow_plugins.secrets.CerberusSecretsHelper.parse_path_and_key","title":"parse_path_and_key(path: Optional[str]) -> Optional[Tuple[str, str]] staticmethod","text":"Source code in brickflow_plugins/secrets/__init__.py
@staticmethod\ndef parse_path_and_key(path: Optional[str]) -> Optional[Tuple[str, str]]:\nif path is not None:\n_cleaned_path = path.lstrip(\"/\").rstrip(\"/\")\nreturn \"/\".join(_cleaned_path.split(\"/\")[:-1]), _cleaned_path.split(\"/\")[-1]\nreturn None\n
"},{"location":"api/secrets/#brickflow_plugins.secrets.DatabricksSecretsBrickflowSecretPluginImpl","title":"brickflow_plugins.secrets.DatabricksSecretsBrickflowSecretPluginImpl","text":"

Bases: BrickflowSecretPluginSpec

"},{"location":"api/secrets/#brickflow_plugins.secrets.DatabricksSecretsBrickflowSecretPluginImpl-functions","title":"Functions","text":""},{"location":"api/secrets/#brickflow_plugins.secrets.DatabricksSecretsBrickflowSecretPluginImpl.get_secret_value","title":"get_secret_value(url_parsed_result: ParseResult) -> Optional['str'] staticmethod","text":"Source code in brickflow_plugins/secrets/__init__.py
@staticmethod\n@brickflow_secrets_backend_plugin_impl\ndef get_secret_value(url_parsed_result: ParseResult) -> Optional[\"str\"]:\n# not implemented yet\nreturn None\n
"},{"location":"api/secrets/#brickflow_plugins.secrets-functions","title":"Functions","text":""},{"location":"api/secrets/#brickflow_plugins.secrets.get_brickflow_tasks_hook","title":"brickflow_plugins.secrets.get_brickflow_tasks_hook() -> BrickflowSecretPluginSpec cached","text":"Source code in brickflow_plugins/secrets/__init__.py
@functools.lru_cache\ndef get_brickflow_tasks_hook() -> BrickflowSecretPluginSpec:\npm = pluggy.PluginManager(BRICKFLOW_SECRETS_BACKEND)\npm.add_hookspecs(BrickflowSecretPluginSpec)\npm.load_setuptools_entrypoints(BRICKFLOW_SECRETS_BACKEND)\npm.register(CerberusBrickflowSecretPluginImpl())\npm.register(Base64BrickflowSecretPluginImpl())\nfor name, plugin_instance in pm.list_name_plugin():\nlog.info(\n\"Loaded plugin with name: %s and class: %s\",\nname,\nplugin_instance.__class__.__name__,\n)\nreturn pm.hook\n
"},{"location":"api/task/","title":"Task","text":""},{"location":"api/task/#brickflow.engine.task-classes","title":"Classes","text":""},{"location":"api/task/#brickflow.engine.task.Task","title":"brickflow.engine.task.Task dataclass","text":""},{"location":"api/task/#brickflow.engine.task.Task-attributes","title":"Attributes","text":""},{"location":"api/task/#brickflow.engine.task.Task.brickflow_default_params","title":"brickflow_default_params: Dict[str, str] property","text":""},{"location":"api/task/#brickflow.engine.task.Task.builtin_notebook_params","title":"builtin_notebook_params: Dict[str, str] property","text":""},{"location":"api/task/#brickflow.engine.task.Task.cluster","title":"cluster: Cluster instance-attribute","text":""},{"location":"api/task/#brickflow.engine.task.Task.custom_execute_callback","title":"custom_execute_callback: Optional[Callable] = None class-attribute instance-attribute","text":""},{"location":"api/task/#brickflow.engine.task.Task.custom_task_parameters","title":"custom_task_parameters: Dict[str, str] property","text":""},{"location":"api/task/#brickflow.engine.task.Task.databricks_task_type_str","title":"databricks_task_type_str: str property","text":""},{"location":"api/task/#brickflow.engine.task.Task.depends_on","title":"depends_on: List[Union[Callable, str]] = field(default_factory=lambda : []) class-attribute instance-attribute","text":""},{"location":"api/task/#brickflow.engine.task.Task.depends_on_names","title":"depends_on_names: Iterator[str] property","text":""},{"location":"api/task/#brickflow.engine.task.Task.description","title":"description: Optional[str] = None class-attribute instance-attribute","text":""},{"location":"api/task/#brickflow.engine.task.Task.libraries","title":"libraries: List[TaskLibrary] = field(default_factory=lambda : []) class-attribute instance-attribute","text":""},{"location":"api/task/#brickflow.engine.task.Task.name","title":"name: str property","text":""},{"location":"api/task/#brickflow.engine.task.Task.parents","title":"parents: List[str] property","text":""},{"location":"api/task/#brickflow.engine.task.Task.task_func","title":"task_func: Callable instance-attribute","text":""},{"location":"api/task/#brickflow.engine.task.Task.task_func_name","title":"task_func_name: str property","text":""},{"location":"api/task/#brickflow.engine.task.Task.task_id","title":"task_id: str instance-attribute","text":""},{"location":"api/task/#brickflow.engine.task.Task.task_settings","title":"task_settings: Optional[TaskSettings] = None class-attribute instance-attribute","text":""},{"location":"api/task/#brickflow.engine.task.Task.task_type","title":"task_type: TaskType = TaskType.BRICKFLOW_TASK class-attribute instance-attribute","text":""},{"location":"api/task/#brickflow.engine.task.Task.trigger_rule","title":"trigger_rule: BrickflowTriggerRule = BrickflowTriggerRule.ALL_SUCCESS class-attribute instance-attribute","text":""},{"location":"api/task/#brickflow.engine.task.Task.workflow","title":"workflow: Workflow instance-attribute","text":""},{"location":"api/task/#brickflow.engine.task.Task-functions","title":"Functions","text":""},{"location":"api/task/#brickflow.engine.task.Task.execute","title":"execute() -> Any","text":"Source code in brickflow/engine/task.py
@with_brickflow_logger\ndef execute(self) -> Any:\n# Workflow is:\n#   1. Check to see if there selected tasks and if there are is this task in the list\n#   2. Check to see if the previous task is skipped and trigger rule.\n#   3. Check to see if this a custom python task and execute it\n#   4. Execute the task function\nctx._set_current_task(self.name)\n_select_task_skip, _select_task_skip_reason = self._skip_because_not_selected()\nif _select_task_skip is True:\n# check if this task is skipped due to task selection\n_ilog.info(\n\"Skipping task... %s for reason: %s\",\nself.name,\n_select_task_skip_reason,\n)\nctx._reset_current_task()\nreturn\n_skip, reason = self.should_skip()\nif _skip is True:\n_ilog.info(\"Skipping task... %s for reason: %s\", self.name, reason)\nctx.task_coms.put(self.name, BRANCH_SKIP_EXCEPT, SKIP_EXCEPT_HACK)\nctx._reset_current_task()\nreturn\ninitial_resp: TaskResponse = get_brickflow_tasks_hook().task_execute(\ntask=self, workflow=self.workflow\n)\nresp: TaskResponse = get_brickflow_tasks_hook().handle_results(\nresp=initial_resp, task=self, workflow=self.workflow\n)\nif resp.push_return_value is True:\nctx.task_coms.put(self.name, RETURN_VALUE_KEY, resp.response)\nctx._reset_current_task()\nreturn resp.response\n
"},{"location":"api/task/#brickflow.engine.task.Task.get_obj_dict","title":"get_obj_dict(entrypoint: str) -> Dict[str, Any]","text":"Source code in brickflow/engine/task.py
def get_obj_dict(self, entrypoint: str) -> Dict[str, Any]:\nreturn {\n\"notebook_path\": self.handle_notebook_path(entrypoint),\n\"base_parameters\": {\n**self.builtin_notebook_params,\n**self.brickflow_default_params,\n**self.custom_task_parameters,  # type: ignore\n# **(self.custom_unique_task_parameters or {}),\n# TODO: implement only after validating limit on parameters\n},\n}\n
"},{"location":"api/task/#brickflow.engine.task.Task.get_runtime_parameter_values","title":"get_runtime_parameter_values() -> Dict[str, Any]","text":"Source code in brickflow/engine/task.py
def get_runtime_parameter_values(self) -> Dict[str, Any]:\n# if dbutils returns None then return v instead\nreturn {\nk: (ctx.get_parameter(k, str(v)) or v)\nfor k, v in (\ninspect.getfullargspec(self.task_func).kwonlydefaults or {}\n).items()\n}\n
"},{"location":"api/task/#brickflow.engine.task.Task.handle_notebook_path","title":"handle_notebook_path(entrypoint: str) -> str staticmethod","text":"Source code in brickflow/engine/task.py
@staticmethod\ndef handle_notebook_path(entrypoint: str) -> str:\n# local will get created as workspace notebook job and not a git source job\nif ctx.env == BrickflowDefaultEnvs.LOCAL.value:\n# check and ensure suffix has .py extension\nreturn entrypoint if entrypoint.endswith(\".py\") else f\"{entrypoint}.py\"\nreturn entrypoint\n
"},{"location":"api/task/#brickflow.engine.task.Task.is_valid_task_signature","title":"is_valid_task_signature() -> None","text":"Source code in brickflow/engine/task.py
def is_valid_task_signature(self) -> None:\n# only supports kwonlyargs with defaults\nspec: inspect.FullArgSpec = inspect.getfullargspec(self.task_func)\nsig: inspect.Signature = inspect.signature(self.task_func)\nsignature_error_msg = (\n\"Task signatures only supports kwargs with defaults. or catch all varkw **kwargs\"\n\"For example def execute(*, variable_a=None, variable_b=None, **kwargs). \"\nf\"Please fix function def {self.task_func_name}{sig}: ...\"\n)\nkwargs_default_error_msg = (\nf\"Keyword arguments must be Strings. \"\nf\"Please handle booleans and numbers via strings. \"\nf\"Please fix function def {self.task_func_name}{sig}: ...\"\n)\nvalid_case = spec.args == [] and spec.varargs is None and spec.defaults is None\nfor _, v in (spec.kwonlydefaults or {}).items():\n# in python boolean is a type of int must be captured here via short circuit\nif not (isinstance(v, str) or v is None):\nraise InvalidTaskSignatureDefinition(kwargs_default_error_msg)\nif valid_case:\nreturn\nraise InvalidTaskSignatureDefinition(signature_error_msg)\n
"},{"location":"api/task/#brickflow.engine.task.Task.should_skip","title":"should_skip() -> Tuple[bool, Optional[str]]","text":"Source code in brickflow/engine/task.py
def should_skip(self) -> Tuple[bool, Optional[str]]:\n# return true or false and reason\nnode_skip_checks = []\nfor parent in self.parents:\nif parent != ROOT_NODE:\ntry:\ntask_to_not_skip = ctx.task_coms.get(parent, BRANCH_SKIP_EXCEPT)\nif self.name != task_to_not_skip:\n# set this task to skip hack to keep to empty to trigger failure\n# key look up will fail\nnode_skip_checks.append(True)\nelse:\nnode_skip_checks.append(False)\nexcept Exception:\n# ignore errors as it probably doesnt exist\n# TODO: log errors\nnode_skip_checks.append(False)\nif not node_skip_checks:\nreturn False, None\nif self.trigger_rule == BrickflowTriggerRule.NONE_FAILED:\n# by default a task failure automatically skips\nreturn self._get_skip_with_reason(\nall(node_skip_checks),\n\"At least one task before this were not successful\",\n)\n# default is BrickflowTriggerRule.ALL_SUCCESS\nreturn self._get_skip_with_reason(\nany(node_skip_checks), \"All tasks before this were not successful\"\n)\n
"},{"location":"api/task/#brickflow.engine.task.EmailNotifications","title":"brickflow.engine.task.EmailNotifications dataclass","text":""},{"location":"api/task/#brickflow.engine.task.EmailNotifications-attributes","title":"Attributes","text":""},{"location":"api/task/#brickflow.engine.task.EmailNotifications.on_failure","title":"on_failure: Optional[List[str]] = None class-attribute instance-attribute","text":""},{"location":"api/task/#brickflow.engine.task.EmailNotifications.on_start","title":"on_start: Optional[List[str]] = None class-attribute instance-attribute","text":""},{"location":"api/task/#brickflow.engine.task.EmailNotifications.on_success","title":"on_success: Optional[List[str]] = None class-attribute instance-attribute","text":""},{"location":"api/task/#brickflow.engine.task.EmailNotifications-functions","title":"Functions","text":""},{"location":"api/task/#brickflow.engine.task.EmailNotifications.to_tf_dict","title":"to_tf_dict() -> Dict[str, Optional[List[str]]]","text":"Source code in brickflow/engine/task.py
def to_tf_dict(self) -> Dict[str, Optional[List[str]]]:\nreturn {\n\"on_start\": self.on_start,\n\"on_failure\": self.on_failure,\n\"on_success\": self.on_success,\n}\n
"},{"location":"api/task/#brickflow.engine.task.JarTaskLibrary","title":"brickflow.engine.task.JarTaskLibrary dataclass","text":"

Bases: StorageBasedTaskLibrary

Parameters:

Name Type Description Default jar str

String to s3/dbfs path for jar

required"},{"location":"api/task/#brickflow.engine.task.JarTaskLibrary-attributes","title":"Attributes","text":""},{"location":"api/task/#brickflow.engine.task.JarTaskLibrary.jar","title":"jar: str instance-attribute","text":""},{"location":"api/task/#brickflow.engine.task.JarTaskLibrary-functions","title":"Functions","text":""},{"location":"api/task/#brickflow.engine.task.EggTaskLibrary","title":"brickflow.engine.task.EggTaskLibrary dataclass","text":"

Bases: StorageBasedTaskLibrary

Parameters:

Name Type Description Default egg str

String to s3/dbfs path for egg

required"},{"location":"api/task/#brickflow.engine.task.EggTaskLibrary-attributes","title":"Attributes","text":""},{"location":"api/task/#brickflow.engine.task.EggTaskLibrary.egg","title":"egg: str instance-attribute","text":""},{"location":"api/task/#brickflow.engine.task.EggTaskLibrary-functions","title":"Functions","text":""},{"location":"api/task/#brickflow.engine.task.WheelTaskLibrary","title":"brickflow.engine.task.WheelTaskLibrary dataclass","text":"

Bases: StorageBasedTaskLibrary

Parameters:

Name Type Description Default whl str

String to s3/dbfs path for whl

required"},{"location":"api/task/#brickflow.engine.task.WheelTaskLibrary-attributes","title":"Attributes","text":""},{"location":"api/task/#brickflow.engine.task.WheelTaskLibrary.whl","title":"whl: str instance-attribute","text":""},{"location":"api/task/#brickflow.engine.task.WheelTaskLibrary-functions","title":"Functions","text":""},{"location":"api/task/#brickflow.engine.task.PypiTaskLibrary","title":"brickflow.engine.task.PypiTaskLibrary dataclass","text":"

Bases: TaskLibrary

Parameters:

Name Type Description Default package str

The package in pypi i.e. requests, requests==x.y.z, git+https://github.com/Nike-Inc/brickflow.git

required repo Optional[str]

The repository where the package can be found. By default pypi is used

None"},{"location":"api/task/#brickflow.engine.task.PypiTaskLibrary-attributes","title":"Attributes","text":""},{"location":"api/task/#brickflow.engine.task.PypiTaskLibrary.dict","title":"dict: Dict[str, Union[str, Dict[str, str]]] property","text":""},{"location":"api/task/#brickflow.engine.task.PypiTaskLibrary.package","title":"package: str instance-attribute","text":""},{"location":"api/task/#brickflow.engine.task.PypiTaskLibrary.repo","title":"repo: Optional[str] = None class-attribute instance-attribute","text":""},{"location":"api/task/#brickflow.engine.task.PypiTaskLibrary-functions","title":"Functions","text":""},{"location":"api/task/#brickflow.engine.task.MavenTaskLibrary","title":"brickflow.engine.task.MavenTaskLibrary dataclass","text":"

Bases: TaskLibrary

Parameters:

Name Type Description Default coordinates str

Gradle-style Maven coordinates. For example: org.jsoup:jsoup:1.7.2.

required repo Optional[str]

Maven repo to install the Maven package from. If omitted, both Maven Central Repository and Spark Packages are searched.

None exclusions Optional[List[str]]

List of dependences to exclude. For example: [\"slf4j:slf4j\", \"*:hadoop-client\"]. Maven dependency exclusions: https://maven.apache.org/guides/introduction/introduction-to-optional-and-excludes-dependencies.html.

None"},{"location":"api/task/#brickflow.engine.task.MavenTaskLibrary-attributes","title":"Attributes","text":""},{"location":"api/task/#brickflow.engine.task.MavenTaskLibrary.coordinates","title":"coordinates: str instance-attribute","text":""},{"location":"api/task/#brickflow.engine.task.MavenTaskLibrary.dict","title":"dict: Dict[str, Union[str, Dict[str, str]]] property","text":""},{"location":"api/task/#brickflow.engine.task.MavenTaskLibrary.exclusions","title":"exclusions: Optional[List[str]] = None class-attribute instance-attribute","text":""},{"location":"api/task/#brickflow.engine.task.MavenTaskLibrary.repo","title":"repo: Optional[str] = None class-attribute instance-attribute","text":""},{"location":"api/task/#brickflow.engine.task.MavenTaskLibrary-functions","title":"Functions","text":""},{"location":"api/task/#brickflow.engine.task.CranTaskLibrary","title":"brickflow.engine.task.CranTaskLibrary dataclass","text":"

Bases: TaskLibrary

Parameters:

Name Type Description Default package str

The name of the CRAN package to install.

required repo Optional[str]

The repository where the package can be found. If not specified, the default CRAN repo is used.

None"},{"location":"api/task/#brickflow.engine.task.CranTaskLibrary-attributes","title":"Attributes","text":""},{"location":"api/task/#brickflow.engine.task.CranTaskLibrary.dict","title":"dict: Dict[str, Union[str, Dict[str, str]]] property","text":""},{"location":"api/task/#brickflow.engine.task.CranTaskLibrary.package","title":"package: str instance-attribute","text":""},{"location":"api/task/#brickflow.engine.task.CranTaskLibrary.repo","title":"repo: Optional[str] = None class-attribute instance-attribute","text":""},{"location":"api/task/#brickflow.engine.task.CranTaskLibrary-functions","title":"Functions","text":""},{"location":"api/task/#brickflow.engine.task.BrickflowTriggerRule","title":"brickflow.engine.task.BrickflowTriggerRule","text":"

Bases: Enum

"},{"location":"api/task/#brickflow.engine.task.BrickflowTriggerRule-attributes","title":"Attributes","text":""},{"location":"api/task/#brickflow.engine.task.BrickflowTriggerRule.ALL_SUCCESS","title":"ALL_SUCCESS = 'all_success' class-attribute instance-attribute","text":""},{"location":"api/task/#brickflow.engine.task.BrickflowTriggerRule.NONE_FAILED","title":"NONE_FAILED = 'none_failed' class-attribute instance-attribute","text":""},{"location":"api/task/#brickflow.engine.task.BrickflowTaskEnvVars","title":"brickflow.engine.task.BrickflowTaskEnvVars","text":"

Bases: Enum

"},{"location":"api/task/#brickflow.engine.task.BrickflowTaskEnvVars-attributes","title":"Attributes","text":""},{"location":"api/task/#brickflow.engine.task.BrickflowTaskEnvVars.BRICKFLOW_SELECT_TASKS","title":"BRICKFLOW_SELECT_TASKS = 'BRICKFLOW_SELECT_TASKS' class-attribute instance-attribute","text":""},{"location":"api/task/#brickflow.engine.task.TaskSettings","title":"brickflow.engine.task.TaskSettings dataclass","text":""},{"location":"api/task/#brickflow.engine.task.TaskSettings-attributes","title":"Attributes","text":""},{"location":"api/task/#brickflow.engine.task.TaskSettings.email_notifications","title":"email_notifications: Optional[EmailNotifications] = None class-attribute instance-attribute","text":""},{"location":"api/task/#brickflow.engine.task.TaskSettings.max_retries","title":"max_retries: Optional[int] = None class-attribute instance-attribute","text":""},{"location":"api/task/#brickflow.engine.task.TaskSettings.min_retry_interval_millis","title":"min_retry_interval_millis: Optional[int] = None class-attribute instance-attribute","text":""},{"location":"api/task/#brickflow.engine.task.TaskSettings.notification_settings","title":"notification_settings: Optional[TaskNotificationSettings] = None class-attribute instance-attribute","text":""},{"location":"api/task/#brickflow.engine.task.TaskSettings.retry_on_timeout","title":"retry_on_timeout: Optional[bool] = None class-attribute instance-attribute","text":""},{"location":"api/task/#brickflow.engine.task.TaskSettings.timeout_seconds","title":"timeout_seconds: Optional[int] = None class-attribute instance-attribute","text":""},{"location":"api/task/#brickflow.engine.task.TaskSettings-functions","title":"Functions","text":""},{"location":"api/task/#brickflow.engine.task.TaskSettings.merge","title":"merge(other: Optional['TaskSettings']) -> 'TaskSettings'","text":"Source code in brickflow/engine/task.py
def merge(self, other: Optional[\"TaskSettings\"]) -> \"TaskSettings\":\n# overrides top level values\nif other is None:\nreturn self\nreturn TaskSettings(\nother.email_notifications or self.email_notifications,\nother.notification_settings or self.notification_settings,\nother.timeout_seconds or self.timeout_seconds or 0,\nother.max_retries or self.max_retries,\nother.min_retry_interval_millis or self.min_retry_interval_millis,\nother.retry_on_timeout or self.retry_on_timeout,\n)\n
"},{"location":"api/task/#brickflow.engine.task.TaskSettings.to_tf_dict","title":"to_tf_dict() -> Dict[str, Optional[str] | Optional[int] | Optional[bool] | Optional[Dict[str, Optional[List[str]]]]]","text":"Source code in brickflow/engine/task.py
def to_tf_dict(\nself,\n) -> Dict[\nstr,\nOptional[str]\n| Optional[int]\n| Optional[bool]\n| Optional[Dict[str, Optional[List[str]]]],\n]:\nemail_not = (\nself.email_notifications.to_tf_dict()\nif self.email_notifications is not None\nelse {}\n)\nnotification_settings = (\n{}\nif self.notification_settings is None\nelse {\"notification_settings\": self.notification_settings.dict()}\n)\nreturn {\n**notification_settings,\n\"email_notifications\": email_not,\n\"timeout_seconds\": self.timeout_seconds,\n\"max_retries\": self.max_retries,\n\"min_retry_interval_millis\": self.min_retry_interval_millis,\n\"retry_on_timeout\": self.retry_on_timeout,\n}\n
"},{"location":"api/task/#brickflow.engine.task.TaskType","title":"brickflow.engine.task.TaskType","text":"

Bases: Enum

"},{"location":"api/task/#brickflow.engine.task.TaskType-attributes","title":"Attributes","text":""},{"location":"api/task/#brickflow.engine.task.TaskType.BRICKFLOW_TASK","title":"BRICKFLOW_TASK = 'brickflow_task' class-attribute instance-attribute","text":""},{"location":"api/task/#brickflow.engine.task.TaskType.CUSTOM_PYTHON_TASK","title":"CUSTOM_PYTHON_TASK = 'custom_python_task' class-attribute instance-attribute","text":""},{"location":"api/task/#brickflow.engine.task.TaskType.DLT","title":"DLT = 'pipeline_task' class-attribute instance-attribute","text":""},{"location":"api/task/#brickflow.engine.task.TaskType.NOTEBOOK_TASK","title":"NOTEBOOK_TASK = 'notebook_task' class-attribute instance-attribute","text":""},{"location":"api/task/#brickflow.engine.task.TaskType.SQL","title":"SQL = 'sql_task' class-attribute instance-attribute","text":""},{"location":"api/workflow/","title":"Workflow","text":""},{"location":"api/workflow/#brickflow.engine.workflow-classes","title":"Classes","text":""},{"location":"api/workflow/#brickflow.engine.workflow.Workflow","title":"brickflow.engine.workflow.Workflow dataclass","text":""},{"location":"api/workflow/#brickflow.engine.workflow.Workflow-attributes","title":"Attributes","text":""},{"location":"api/workflow/#brickflow.engine.workflow.Workflow.active_task","title":"active_task: Optional[str] = None class-attribute instance-attribute","text":""},{"location":"api/workflow/#brickflow.engine.workflow.Workflow.bfs_layers","title":"bfs_layers: List[str] property","text":""},{"location":"api/workflow/#brickflow.engine.workflow.Workflow.clusters","title":"clusters: List[Cluster] = field(default_factory=lambda : []) class-attribute instance-attribute","text":""},{"location":"api/workflow/#brickflow.engine.workflow.Workflow.common_task_parameters","title":"common_task_parameters: Optional[Dict[str, str]] = None class-attribute instance-attribute","text":""},{"location":"api/workflow/#brickflow.engine.workflow.Workflow.default_cluster","title":"default_cluster: Optional[Cluster] = None class-attribute instance-attribute","text":""},{"location":"api/workflow/#brickflow.engine.workflow.Workflow.default_task_settings","title":"default_task_settings: TaskSettings = TaskSettings() class-attribute instance-attribute","text":""},{"location":"api/workflow/#brickflow.engine.workflow.Workflow.email_notifications","title":"email_notifications: Optional[WorkflowEmailNotifications] = None class-attribute instance-attribute","text":""},{"location":"api/workflow/#brickflow.engine.workflow.Workflow.graph","title":"graph: nx.DiGraph = field(default_factory=nx.DiGraph) class-attribute instance-attribute","text":""},{"location":"api/workflow/#brickflow.engine.workflow.Workflow.libraries","title":"libraries: List[TaskLibrary] = field(default_factory=lambda : []) class-attribute instance-attribute","text":""},{"location":"api/workflow/#brickflow.engine.workflow.Workflow.max_concurrent_runs","title":"max_concurrent_runs: int = 1 class-attribute instance-attribute","text":""},{"location":"api/workflow/#brickflow.engine.workflow.Workflow.max_tasks_in_workflow","title":"max_tasks_in_workflow: int = 100 class-attribute instance-attribute","text":""},{"location":"api/workflow/#brickflow.engine.workflow.Workflow.name","title":"name: str property","text":""},{"location":"api/workflow/#brickflow.engine.workflow.Workflow.notification_settings","title":"notification_settings: Optional[WorkflowNotificationSettings] = None class-attribute instance-attribute","text":""},{"location":"api/workflow/#brickflow.engine.workflow.Workflow.permissions","title":"permissions: WorkflowPermissions = WorkflowPermissions() class-attribute instance-attribute","text":""},{"location":"api/workflow/#brickflow.engine.workflow.Workflow.prefix","title":"prefix: str = field(default_factory=lambda : config('BRICKFLOW_WORKFLOW_PREFIX', '')) class-attribute instance-attribute","text":""},{"location":"api/workflow/#brickflow.engine.workflow.Workflow.run_as_service_principal","title":"run_as_service_principal: Optional[str] = None class-attribute instance-attribute","text":""},{"location":"api/workflow/#brickflow.engine.workflow.Workflow.run_as_user","title":"run_as_user: Optional[str] = None class-attribute instance-attribute","text":""},{"location":"api/workflow/#brickflow.engine.workflow.Workflow.schedule_quartz_expression","title":"schedule_quartz_expression: Optional[str] = None class-attribute instance-attribute","text":""},{"location":"api/workflow/#brickflow.engine.workflow.Workflow.suffix","title":"suffix: str = field(default_factory=lambda : config('BRICKFLOW_WORKFLOW_SUFFIX', '')) class-attribute instance-attribute","text":""},{"location":"api/workflow/#brickflow.engine.workflow.Workflow.tags","title":"tags: Optional[Dict[str, str]] = None class-attribute instance-attribute","text":""},{"location":"api/workflow/#brickflow.engine.workflow.Workflow.tasks","title":"tasks: Dict[str, Task] = field(default_factory=lambda : {}) class-attribute instance-attribute","text":""},{"location":"api/workflow/#brickflow.engine.workflow.Workflow.timezone","title":"timezone: str = 'UTC' class-attribute instance-attribute","text":""},{"location":"api/workflow/#brickflow.engine.workflow.Workflow.trigger","title":"trigger: Optional[Trigger] = None class-attribute instance-attribute","text":""},{"location":"api/workflow/#brickflow.engine.workflow.Workflow.unique_new_clusters","title":"unique_new_clusters: List[Cluster] property","text":""},{"location":"api/workflow/#brickflow.engine.workflow.Workflow.webhook_notifications","title":"webhook_notifications: Optional[WorkflowWebhookNotifications] = None class-attribute instance-attribute","text":""},{"location":"api/workflow/#brickflow.engine.workflow.Workflow-functions","title":"Functions","text":""},{"location":"api/workflow/#brickflow.engine.workflow.Workflow.bfs_task_iter","title":"bfs_task_iter() -> Iterator[Task]","text":"Source code in brickflow/engine/workflow.py
def bfs_task_iter(self) -> Iterator[Task]:\nfor layer in self.bfs_layers:\nfor task_key in layer:\nyield self.get_task(task_key)\n
"},{"location":"api/workflow/#brickflow.engine.workflow.Workflow.check_no_active_task","title":"check_no_active_task() -> None","text":"Source code in brickflow/engine/workflow.py
def check_no_active_task(self) -> None:\nif self.active_task is not None:\nraise AnotherActiveTaskError(\n\"You are calling another active task in another task. \"\n\"Please abstract the code more.\"\n)\n
"},{"location":"api/workflow/#brickflow.engine.workflow.Workflow.dlt_task","title":"dlt_task(task_func: Optional[Callable] = None, name: Optional[str] = None, depends_on: Optional[Union[Callable, str, List[Union[Callable, str]]]] = None) -> Callable","text":"Source code in brickflow/engine/workflow.py
def dlt_task(\nself,\ntask_func: Optional[Callable] = None,\nname: Optional[str] = None,\ndepends_on: Optional[Union[Callable, str, List[Union[Callable, str]]]] = None,\n) -> Callable:\nreturn self.task(task_func, name, task_type=TaskType.DLT, depends_on=depends_on)\n
"},{"location":"api/workflow/#brickflow.engine.workflow.Workflow.get_task","title":"get_task(task_id: str) -> Task","text":"Source code in brickflow/engine/workflow.py
@wraps_keyerror(TaskNotFoundError, \"Unable to find task: \")\ndef get_task(self, task_id: str) -> Task:\nreturn self.tasks[task_id]\n
"},{"location":"api/workflow/#brickflow.engine.workflow.Workflow.notebook_task","title":"notebook_task(task_func: Optional[Callable] = None, name: Optional[str] = None, cluster: Optional[Cluster] = None, libraries: Optional[List[TaskLibrary]] = None, task_settings: Optional[TaskSettings] = None, depends_on: Optional[Union[Callable, str, List[Union[Callable, str]]]] = None) -> Callable","text":"Source code in brickflow/engine/workflow.py
def notebook_task(\nself,\ntask_func: Optional[Callable] = None,\nname: Optional[str] = None,\ncluster: Optional[Cluster] = None,\nlibraries: Optional[List[TaskLibrary]] = None,\ntask_settings: Optional[TaskSettings] = None,\ndepends_on: Optional[Union[Callable, str, List[Union[Callable, str]]]] = None,\n) -> Callable:\nreturn self.task(\ntask_func,\nname,\ncluster=cluster,\nlibraries=libraries,\ntask_type=TaskType.NOTEBOOK_TASK,\ntask_settings=task_settings,\ndepends_on=depends_on,\n)\n
"},{"location":"api/workflow/#brickflow.engine.workflow.Workflow.parents","title":"parents(node: str) -> Iterator","text":"Source code in brickflow/engine/workflow.py
def parents(self, node: str) -> Iterator:\nreturn self.graph.predecessors(node)\n
"},{"location":"api/workflow/#brickflow.engine.workflow.Workflow.pop_task","title":"pop_task(task_id: str) -> None","text":"Source code in brickflow/engine/workflow.py
@wraps_keyerror(TaskNotFoundError, \"Unable to find task: \")\ndef pop_task(self, task_id: str) -> None:\n# Pop from dict and graph\nself.tasks.pop(task_id)\nself.graph.remove_node(task_id)\n
"},{"location":"api/workflow/#brickflow.engine.workflow.Workflow.task","title":"task(task_func: Optional[Callable] = None, name: Optional[str] = None, cluster: Optional[Cluster] = None, libraries: Optional[List[TaskLibrary]] = None, task_type: TaskType = TaskType.BRICKFLOW_TASK, depends_on: Optional[Union[Callable, str, List[Union[Callable, str]]]] = None, trigger_rule: BrickflowTriggerRule = BrickflowTriggerRule.ALL_SUCCESS, custom_execute_callback: Optional[Callable] = None, task_settings: Optional[TaskSettings] = None) -> Callable","text":"Source code in brickflow/engine/workflow.py
def task(\nself,\ntask_func: Optional[Callable] = None,\nname: Optional[str] = None,\ncluster: Optional[Cluster] = None,\nlibraries: Optional[List[TaskLibrary]] = None,\ntask_type: TaskType = TaskType.BRICKFLOW_TASK,\ndepends_on: Optional[Union[Callable, str, List[Union[Callable, str]]]] = None,\ntrigger_rule: BrickflowTriggerRule = BrickflowTriggerRule.ALL_SUCCESS,\ncustom_execute_callback: Optional[Callable] = None,\ntask_settings: Optional[TaskSettings] = None,\n) -> Callable:\nif len(self.tasks) >= self.max_tasks_in_workflow:\nraise ValueError(\n\"You have reached the maximum number of tasks allowed in a databricks workflow. \"\n\"Please split your workflow into multiple workflows or raise a feature request \"\n\"with your Databricks team.\"\n)\ndef task_wrapper(f: Callable) -> Callable:\ntask_id = name or f.__name__\nself._add_task(\nf,\ntask_id,\ncluster=cluster,\ntask_type=task_type,\nlibraries=libraries,\ndepends_on=depends_on,\ntrigger_rule=trigger_rule,\ncustom_execute_callback=custom_execute_callback,\ntask_settings=task_settings,\n)\n@functools.wraps(f)\ndef func(*args, **kwargs):  # type: ignore\ntry:\nself.check_no_active_task()\nself._set_active_task(task_id)\nresp = f(*args, **kwargs)\nreturn resp\nexcept Exception as e:\nself._reset_active_task()\nraise e\nfinally:\nself._reset_active_task()\nreturn func\nif task_func is not None:\nif callable(task_func):\nreturn task_wrapper(task_func)\nelse:\nraise NoCallableTaskError(\n\"Please use task decorator against a callable function.\"\n)\nreturn task_wrapper\n
"},{"location":"api/workflow/#brickflow.engine.workflow.Workflow.task_exists","title":"task_exists(task_id: str) -> bool","text":"Source code in brickflow/engine/workflow.py
def task_exists(self, task_id: str) -> bool:\nreturn task_id in self.tasks\n
"},{"location":"api/workflow/#brickflow.engine.workflow.Workflow.task_iter","title":"task_iter() -> Iterator[Task]","text":"Source code in brickflow/engine/workflow.py
def task_iter(self) -> Iterator[Task]:\nfor task in self.bfs_task_iter():\nyield task\n
"},{"location":"api/workflow/#brickflow.engine.workflow.Workflow.unique_new_clusters_dict","title":"unique_new_clusters_dict() -> List[Dict[str, Any]]","text":"Source code in brickflow/engine/workflow.py
def unique_new_clusters_dict(self) -> List[Dict[str, Any]]:\nself.validate_new_clusters_with_unique_names()\nall_unique_clusters = self.unique_new_clusters\nreturn [\n# job clusters do not need names\n{\n\"job_cluster_key\": c.name,\n\"new_cluster\": c.as_dict(remove_fields=[\"name\"]),\n}\nfor c in all_unique_clusters\n]\n
"},{"location":"api/workflow/#brickflow.engine.workflow.Workflow.validate_new_clusters_with_unique_names","title":"validate_new_clusters_with_unique_names() -> None","text":"Source code in brickflow/engine/workflow.py
def validate_new_clusters_with_unique_names(self) -> None:\nall_unique_clusters = self.unique_new_clusters\nunique_name_list: Dict[str, Optional[str]] = {}\nduplicates = []\nfor cluster in all_unique_clusters:\nif cluster.name not in unique_name_list:\nunique_name_list[cluster.name] = None\nelse:\nduplicates.append(cluster.name)\nduplicate_list = list(set(duplicates))\nif len(duplicate_list) > 0:\nraise DuplicateClustersDefinitionError(\nf\"Found duplicate cluster definitions in your workflow: {self.name}, \"\nf\"with names: {duplicate_list}\"\n)\n
"},{"location":"api/workflow/#brickflow.engine.workflow.User","title":"brickflow.engine.workflow.User","text":"

Bases: ScimEntity

"},{"location":"api/workflow/#brickflow.engine.workflow.User-attributes","title":"Attributes","text":""},{"location":"api/workflow/#brickflow.engine.workflow.User-functions","title":"Functions","text":""},{"location":"api/workflow/#brickflow.engine.workflow.User.to_access_control","title":"to_access_control() -> Dict[str, str]","text":"Source code in brickflow/engine/workflow.py
def to_access_control(self) -> Dict[str, str]:\nreturn {\"user_name\": self.name}\n
"},{"location":"api/workflow/#brickflow.engine.workflow.Group","title":"brickflow.engine.workflow.Group","text":"

Bases: ScimEntity

"},{"location":"api/workflow/#brickflow.engine.workflow.Group-attributes","title":"Attributes","text":""},{"location":"api/workflow/#brickflow.engine.workflow.Group-functions","title":"Functions","text":""},{"location":"api/workflow/#brickflow.engine.workflow.Group.to_access_control","title":"to_access_control() -> Dict[str, str]","text":"Source code in brickflow/engine/workflow.py
def to_access_control(self) -> Dict[str, str]:\nreturn {\"group_name\": self.name}\n
"},{"location":"api/workflow/#brickflow.engine.workflow.ServicePrincipal","title":"brickflow.engine.workflow.ServicePrincipal","text":"

Bases: ScimEntity

"},{"location":"api/workflow/#brickflow.engine.workflow.ServicePrincipal-attributes","title":"Attributes","text":""},{"location":"api/workflow/#brickflow.engine.workflow.ServicePrincipal-functions","title":"Functions","text":""},{"location":"api/workflow/#brickflow.engine.workflow.ServicePrincipal.to_access_control","title":"to_access_control() -> Dict[str, str]","text":"Source code in brickflow/engine/workflow.py
def to_access_control(self) -> Dict[str, str]:\nreturn {\"service_principal_name\": self.name}\n
"},{"location":"api/workflow_dependency_sensor/","title":"WorkflowDependencySensor","text":""},{"location":"api/workflow_dependency_sensor/#brickflow_plugins.databricks.workflow_dependency_sensor-attributes","title":"Attributes","text":""},{"location":"api/workflow_dependency_sensor/#brickflow_plugins.databricks.workflow_dependency_sensor-classes","title":"Classes","text":""},{"location":"api/workflow_dependency_sensor/#brickflow_plugins.databricks.workflow_dependency_sensor.WorkflowDependencySensor","title":"brickflow_plugins.databricks.workflow_dependency_sensor.WorkflowDependencySensor(databricks_host: str, databricks_secrets_scope: str, databricks_secrets_key: str, dependency_job_id: int, delta: timedelta, timeout_seconds: int, poke_interval_seconds: int = 60)","text":"

This is used to have dependencies on the databricks workflow

Example Usage in your brickflow task

WorkflowDependencySensor( databricks_host=https://your_workspace_url.cloud.databricks.com, databricks_secrets_scope=\"brickflow-demo-tobedeleted\", databricks_secrets_key=\"service_principle_id\" dependency_job_id=job_id, poke_interval=20, timeout=60, delta=timedelta(days=1) )

Source code in brickflow_plugins/databricks/workflow_dependency_sensor.py
def __init__(\nself,\ndatabricks_host: str,\ndatabricks_secrets_scope: str,\ndatabricks_secrets_key: str,\ndependency_job_id: int,\ndelta: timedelta,\ntimeout_seconds: int,\npoke_interval_seconds: int = 60,\n):\nself.databricks_host = databricks_host\nself.dependency_job_id = dependency_job_id\nself.databricks_secrets_scope = databricks_secrets_scope\nself.databricks_secrets_key = databricks_secrets_key\nself.poke_interval = poke_interval_seconds\nself.timeout = timeout_seconds\nself.delta = delta\nself.log = logging\nself.start_time = time.time()\n
"},{"location":"api/workflow_dependency_sensor/#brickflow_plugins.databricks.workflow_dependency_sensor.WorkflowDependencySensor-attributes","title":"Attributes","text":""},{"location":"api/workflow_dependency_sensor/#brickflow_plugins.databricks.workflow_dependency_sensor.WorkflowDependencySensor.databricks_host","title":"databricks_host = databricks_host instance-attribute","text":""},{"location":"api/workflow_dependency_sensor/#brickflow_plugins.databricks.workflow_dependency_sensor.WorkflowDependencySensor.databricks_secrets_key","title":"databricks_secrets_key = databricks_secrets_key instance-attribute","text":""},{"location":"api/workflow_dependency_sensor/#brickflow_plugins.databricks.workflow_dependency_sensor.WorkflowDependencySensor.databricks_secrets_scope","title":"databricks_secrets_scope = databricks_secrets_scope instance-attribute","text":""},{"location":"api/workflow_dependency_sensor/#brickflow_plugins.databricks.workflow_dependency_sensor.WorkflowDependencySensor.delta","title":"delta = delta instance-attribute","text":""},{"location":"api/workflow_dependency_sensor/#brickflow_plugins.databricks.workflow_dependency_sensor.WorkflowDependencySensor.dependency_job_id","title":"dependency_job_id = dependency_job_id instance-attribute","text":""},{"location":"api/workflow_dependency_sensor/#brickflow_plugins.databricks.workflow_dependency_sensor.WorkflowDependencySensor.log","title":"log = logging instance-attribute","text":""},{"location":"api/workflow_dependency_sensor/#brickflow_plugins.databricks.workflow_dependency_sensor.WorkflowDependencySensor.poke_interval","title":"poke_interval = poke_interval_seconds instance-attribute","text":""},{"location":"api/workflow_dependency_sensor/#brickflow_plugins.databricks.workflow_dependency_sensor.WorkflowDependencySensor.start_time","title":"start_time = time.time() instance-attribute","text":""},{"location":"api/workflow_dependency_sensor/#brickflow_plugins.databricks.workflow_dependency_sensor.WorkflowDependencySensor.timeout","title":"timeout = timeout_seconds instance-attribute","text":""},{"location":"api/workflow_dependency_sensor/#brickflow_plugins.databricks.workflow_dependency_sensor.WorkflowDependencySensor-functions","title":"Functions","text":""},{"location":"api/workflow_dependency_sensor/#brickflow_plugins.databricks.workflow_dependency_sensor.WorkflowDependencySensor.execute","title":"execute()","text":"Source code in brickflow_plugins/databricks/workflow_dependency_sensor.py
def execute(self):\nsession = self.get_http_session()\nurl = f\"{self.databricks_host.rstrip('/')}/api/2.0/jobs/runs/list\"\nheaders = {\n\"Authorization\": f\"Bearer {self.get_token()}\",\n\"Content-Type\": \"application/json\",\n}\n# http://www.unixtimestampconverter.com/\nparams = {\n\"limit\": 25,\n\"job_id\": self.dependency_job_id,\n\"expand_tasks\": \"true\",\n\"start_time_from\": self.get_the_execution_date(),\n}\nwhile True:\noffset = 0\nhas_more = True\nwhile has_more is True:\nparams[\"offset\"] = offset\nresp = session.get(url, params=params, headers=headers).json()\nfor run in resp.get(\"runs\", []):\nself.log.info(\nf\"Found the run_id: {run['run_id']}, and it's result_state is: {run.get('state', {}).get('result_state', None)}\"\n)\nif run.get(\"state\", {}).get(\"result_state\", None) == \"SUCCESS\":\nself.log.info(f\"Found a successful run: {run['run_id']}\")\nreturn\noffset += params[\"limit\"]\nhas_more = resp.get(\"has_more\", False)\nself.log.info(f\"This is offset: {offset}, this is has_more: {has_more}\")\nself.log.info(\"Didn't find a successful run yet\")\nif (\nself.timeout is not None\nand (time.time() - self.start_time) > self.timeout\n):\nraise WorkflowDependencySensorTimeOutException(f\"The job has timed out\")\nself.log.info(f\"sleeping for: {self.poke_interval}\")\ntime.sleep(self.poke_interval)\n
"},{"location":"api/workflow_dependency_sensor/#brickflow_plugins.databricks.workflow_dependency_sensor.WorkflowDependencySensor.get_http_session","title":"get_http_session() cached","text":"Source code in brickflow_plugins/databricks/workflow_dependency_sensor.py
@functools.lru_cache(maxsize=None)\ndef get_http_session(self):\nsession = requests.Session()\nmax_retries = int(os.getenv(\"DATABRICKS_REQUEST_RETRY_COUNT\", 10))\nretries = self.get_retry_class(max_retries)(\ntotal=max_retries,\nbackoff_factor=1,\nstatus_forcelist=[500, 501, 502, 503, 504, 429],\n)\nsession.mount(\"https://\", HTTPAdapter(max_retries=retries))\nsession.mount(\"http://\", HTTPAdapter(max_retries=retries))\nreturn session\n
"},{"location":"api/workflow_dependency_sensor/#brickflow_plugins.databricks.workflow_dependency_sensor.WorkflowDependencySensor.get_retry_class","title":"get_retry_class(max_retries)","text":"Source code in brickflow_plugins/databricks/workflow_dependency_sensor.py
def get_retry_class(self, max_retries):\nfrom urllib3 import Retry\nlog = self.log\nclass LogRetry(Retry):\n\"\"\"\n        Adding extra logs before making a retry request\n        \"\"\"\ndef __init__(self, *args, **kwargs):\nif (\nkwargs.get(\"total\", None) != max_retries\nand kwargs.get(\"total\", None) > 0\n):\nlog.info(f\"Retrying with kwargs: {kwargs}\")\nsuper().__init__(*args, **kwargs)\nreturn LogRetry\n
"},{"location":"api/workflow_dependency_sensor/#brickflow_plugins.databricks.workflow_dependency_sensor.WorkflowDependencySensor.get_the_execution_date","title":"get_the_execution_date() -> str","text":"Source code in brickflow_plugins/databricks/workflow_dependency_sensor.py
def get_the_execution_date(self) -> str:\nsession = self.get_http_session()\nurl = f\"{self.databricks_host.rstrip('/')}/api/2.0/jobs/runs/get\"\nheaders = {\n\"Authorization\": f\"Bearer {self.get_token()}\",\n\"Content-Type\": \"application/json\",\n}\nrun_id = ctx.dbutils_widget_get_or_else(\"brickflow_parent_run_id\", None)\nif run_id is None:\nraise WorkflowDependencySensorException(\n\"run_id is empty, brickflow_parent_run_id parameter is not found \"\n\"or no value present\"\n)\nparams = {\"run_id\": run_id}\nresp = session.get(url, params=params, headers=headers).json()\n# Convert Unix timestamp to datetime object\nstart_time = datetime.fromtimestamp(resp[\"start_time\"] / 1000)\nexecution_date = start_time - self.delta\nself.log.info(start_time)\nself.log.info(execution_date)\nself.log.info(execution_date.strftime(\"%s\"))\nreturn execution_date.strftime(\"%s\")\n
"},{"location":"api/workflow_dependency_sensor/#brickflow_plugins.databricks.workflow_dependency_sensor.WorkflowDependencySensor.get_token","title":"get_token() cached","text":"Source code in brickflow_plugins/databricks/workflow_dependency_sensor.py
@functools.lru_cache\ndef get_token(self):\nreturn ctx.dbutils.secrets.get(\nself.databricks_secrets_scope, self.databricks_secrets_key\n)\n
"},{"location":"api/workflow_dependency_sensor/#brickflow_plugins.databricks.workflow_dependency_sensor.WorkflowDependencySensorException","title":"brickflow_plugins.databricks.workflow_dependency_sensor.WorkflowDependencySensorException","text":"

Bases: Exception

"},{"location":"api/workflow_dependency_sensor/#brickflow_plugins.databricks.workflow_dependency_sensor.WorkflowDependencySensorTimeOutException","title":"brickflow_plugins.databricks.workflow_dependency_sensor.WorkflowDependencySensorTimeOutException","text":"

Bases: TimeoutError

"},{"location":"cli/reference/","title":"Commands","text":"

This page provides documentation for our command line tools.

"},{"location":"cli/reference/#bf","title":"bf","text":"

CLI for managing Databricks Workflows

Usage:

bf [OPTIONS] COMMAND [ARGS]...\n

Options:

  --version  Show the version and exit.\n  --help     Show this message and exit.\n
"},{"location":"cli/reference/#bundle","title":"bundle","text":"

CLI for proxying to databricks bundles cli.

Usage:

bf bundle [OPTIONS]\n

Options:

  --help  Show this message and exit.\n
"},{"location":"cli/reference/#cdktf","title":"cdktf","text":"

CLI for proxying to cdktf cli.

Usage:

bf cdktf [OPTIONS]\n

Options:

  --help  Show this message and exit.\n
"},{"location":"cli/reference/#deploy","title":"deploy","text":"

CLI for deploying workflow projects.

Usage:

bf deploy [OPTIONS]\n

Options:

  --auto-approve                  Auto approve brickflow pipeline without\n                                  being prompted to approve.\n  --deploy-mode [cdktf|bundle]    Which deployment framework to use to deploy.\n                                  [default: cdktf]\n  --force-acquire-lock            Force acquire lock for databricks bundles\n                                  deploy.\n  -p, --profile TEXT              The databricks profile to use for\n                                  authenticating to databricks during\n                                  deployment.\n  --git-provider TEXT             The github provider for brickflow this is\n                                  used for configuring github on DBX jobs.\n  --git-ref TEXT                  The commit/tag/branch to use in github.\n  -r, --repo-url TEXT             The github url in which to run brickflow\n                                  with.\n  -e, --env TEXT                  Set the environment value, certain tags\n                                  [TBD] get added to the workflows based on\n                                  this value.\n  -w, --workflow TEXT             Provide the workflow file names which you\n                                  want to deploy, each file name separated by\n                                  space! Example: bf deploy -p DEFAULT -l -w\n                                  wf1.py -w wf2.py\n  -wd, --workflows-dir DIRECTORY  Provide the workflow directory that has to\n                                  be deployed\n  -l, --local-mode                Set the environment flag to local and other\n                                  components [TBD] are disabled in local mode.\n  --help                          Show this message and exit.\n
"},{"location":"cli/reference/#destroy","title":"destroy","text":"

CLI for destroying workflow projects.

Usage:

bf destroy [OPTIONS]\n

Options:

  --auto-approve                  Auto approve brickflow pipeline without\n                                  being prompted to approve.\n  --deploy-mode [cdktf|bundle]    Which deployment framework to use to deploy.\n                                  [default: cdktf]\n  --force-acquire-lock            Force acquire lock for databricks bundles\n                                  destroy.\n  -p, --profile TEXT              The databricks profile to use for\n                                  authenticating to databricks during\n                                  deployment.\n  --git-provider TEXT             The github provider for brickflow this is\n                                  used for configuring github on DBX jobs.\n  --git-ref TEXT                  The commit/tag/branch to use in github.\n  -r, --repo-url TEXT             The github url in which to run brickflow\n                                  with.\n  -e, --env TEXT                  Set the environment value, certain tags\n                                  [TBD] get added to the workflows based on\n                                  this value.\n  -w, --workflow TEXT             Provide the workflow file names which you\n                                  want to deploy, each file name separated by\n                                  space! Example: bf deploy -p DEFAULT -l -w\n                                  wf1.py -w wf2.py\n  -wd, --workflows-dir DIRECTORY  Provide the workflow directory that has to\n                                  be deployed\n  -l, --local-mode                Set the environment flag to local and other\n                                  components [TBD] are disabled in local mode.\n  --help                          Show this message and exit.\n
"},{"location":"cli/reference/#diff","title":"diff","text":"

CLI for identifying diff in projects (only cdktf supported).

Usage:

bf diff [OPTIONS]\n

Options:

  -p, --profile TEXT              The databricks profile to use for\n                                  authenticating to databricks during\n                                  deployment.\n  --git-provider TEXT             The github provider for brickflow this is\n                                  used for configuring github on DBX jobs.\n  --git-ref TEXT                  The commit/tag/branch to use in github.\n  -r, --repo-url TEXT             The github url in which to run brickflow\n                                  with.\n  -e, --env TEXT                  Set the environment value, certain tags\n                                  [TBD] get added to the workflows based on\n                                  this value.\n  -w, --workflow TEXT             Provide the workflow file names which you\n                                  want to deploy, each file name separated by\n                                  space! Example: bf deploy -p DEFAULT -l -w\n                                  wf1.py -w wf2.py\n  -wd, --workflows-dir DIRECTORY  Provide the workflow directory that has to\n                                  be deployed\n  -l, --local-mode                Set the environment flag to local and other\n                                  components [TBD] are disabled in local mode.\n  --help                          Show this message and exit.\n
"},{"location":"cli/reference/#docs","title":"docs","text":"

Use to open docs in your browser...

Usage:

bf docs [OPTIONS]\n

Options:

  --help  Show this message and exit.\n
"},{"location":"cli/reference/#init","title":"init","text":"

Initialize your project with Brickflow...

Usage:

bf init [OPTIONS]\n

Options:

  -n, --project-name TEXT\n  -g, --git-https-url TEXT        Provide the github URL for your project,\n                                  example: https://github.com/nike-eda-\n                                  apla/brickflow\n  -wd, --workflows-dir DIRECTORY\n  -bfv, --brickflow-version TEXT\n  -sev, --spark-expectations-version TEXT\n  --help                          Show this message and exit.\n
"},{"location":"cli/reference/#projects","title":"projects","text":"

Manage one to many brickflow projects

Usage:

bf projects [OPTIONS] COMMAND [ARGS]...\n

Options:

  --help  Show this message and exit.\n
"},{"location":"cli/reference/#add","title":"add","text":"

Adds a project to the brickflow-multi-project.yml file and a entrypoint.py file in workflows dir

Usage:

bf projects add [OPTIONS]\n

Options:

  --name TEXT                     Name of the project\n  --path-from-repo-root-to-project-root DIRECTORY\n                                  Path from repo root to project root\n  --path-project-root-to-workflows-dir TEXT\n                                  Path from project root to workflows dir\n  --deployment-mode [bundle]      Deployment mode\n  -g, --git-https-url TEXT        Provide the github URL for your project,\n                                  example: https://github.com/nike-eda-\n                                  apla/brickflow\n  -bfv, --brickflow-version TEXT\n  -sev, --spark-expectations-version TEXT\n  --skip-entrypoint               Skip creating entrypoint.py file\n  --help                          Show this message and exit.\n
"},{"location":"cli/reference/#deploy_1","title":"deploy","text":"

Deploy projects in the brickflow-multi-project.yml file

Usage:

bf projects deploy [OPTIONS]\n

Options:

  --force-acquire-lock  Force acquire lock for databricks bundles destroy.\n  --auto-approve        Auto approve brickflow pipeline without being prompted\n                        to approve.\n  -p, --profile TEXT    The databricks profile to use for authenticating to\n                        databricks during deployment.\n  --project []          Select the project of workflows you would like to\n                        deploy.\n  -e, --env TEXT        Set the environment value, certain tags [TBD] get\n                        added to the workflows based on this value.\n  --help                Show this message and exit.\n
"},{"location":"cli/reference/#destroy_1","title":"destroy","text":"

Destroy projects in the brickflow-multi-project.yml file

Usage:

bf projects destroy [OPTIONS]\n

Options:

  --force-acquire-lock  Force acquire lock for databricks bundles destroy.\n  --auto-approve        Auto approve brickflow pipeline without being prompted\n                        to approve.\n  -p, --profile TEXT    The databricks profile to use for authenticating to\n                        databricks during deployment.\n  --project []          Select the project of workflows you would like to\n                        deploy.\n  -e, --env TEXT        Set the environment value, certain tags [TBD] get\n                        added to the workflows based on this value.\n  --help                Show this message and exit.\n
"},{"location":"cli/reference/#list","title":"list","text":"

Lists all projects in the brickflow-multi-project.yml file

Usage:

bf projects list [OPTIONS]\n

Options:

  --help  Show this message and exit.\n
"},{"location":"cli/reference/#remove","title":"remove","text":"

Removes a project from the brickflow-multi-project.yml file

Usage:

bf projects remove [OPTIONS]\n

Options:

  --name []  Name of the project\n  --help     Show this message and exit.\n
"},{"location":"cli/reference/#synth","title":"synth","text":"

Synth the bundle.yml for project

Usage:

bf projects synth [OPTIONS]\n

Options:

  -p, --profile TEXT  The databricks profile to use for authenticating to\n                      databricks during deployment.\n  --project []        Select the project of workflows you would like to\n                      deploy.\n  -e, --env TEXT      Set the environment value, certain tags [TBD] get added\n                      to the workflows based on this value.\n  --help              Show this message and exit.\n
"},{"location":"cli/reference/#sync","title":"sync","text":"

Synchronize your bundle tree to databricks workspace (only supported by bundle deployment mode).

Usage:

bf sync [OPTIONS]\n

Options:

  --deploy-mode [bundle]          Which deployment framework to use to deploy.\n                                  [default: bundle]\n  --watch                         Enable filewatcher to sync files over.\n  --full                          Run a full sync.\n  --interval-duration TEXT        File system polling interval (for --watch).\n  --debug TEXT                    File system polling interval (for --watch).\n  -p, --profile TEXT              The databricks profile to use for\n                                  authenticating to databricks during\n                                  deployment.\n  --git-provider TEXT             The github provider for brickflow this is\n                                  used for configuring github on DBX jobs.\n  --git-ref TEXT                  The commit/tag/branch to use in github.\n  -r, --repo-url TEXT             The github url in which to run brickflow\n                                  with.\n  -e, --env TEXT                  Set the environment value, certain tags\n                                  [TBD] get added to the workflows based on\n                                  this value.\n  -w, --workflow TEXT             Provide the workflow file names which you\n                                  want to deploy, each file name separated by\n                                  space! Example: bf deploy -p DEFAULT -l -w\n                                  wf1.py -w wf2.py\n  -wd, --workflows-dir DIRECTORY  Provide the workflow directory that has to\n                                  be deployed\n  -l, --local-mode                Set the environment flag to local and other\n                                  components [TBD] are disabled in local mode.\n  --help                          Show this message and exit.\n
"},{"location":"faq/airflow-operator-rfc/","title":"Airflow operator rfc","text":""},{"location":"faq/airflow-operator-rfc/#airflow-operator-brickflow-support-rfc","title":"Airflow Operator - Brickflow Support RFC","text":"Airflow Operator Databricks Native Equivalent Will Implement Link to Issues Link to Impl Link to Docs Snowflake Operator Branch Python Operator Slack Operator Email Operator Task Dependency Sensor Canary Operator Bash Operator Short Circuit Operator S3 Sensor Compute Bash Operator Look at Bash Operator Compute Python Operator Use a task EMR Operator Use a task Spark Operator Use a task Python Operator Use a task Dummy Operator Use a task Genie Snowflake Operator Look at snowflake operator Genie Hive Operator N/A Genie S3 Dist CP Operator N/A Athena Operator Use DBSQL Nike EMR Operator Use a task Nike Spark Submit Operator Use a task Compute S3 Prefix Sensor Look at S3 sensor"},{"location":"faq/airflow-operator-rfc/#operators","title":"Operators","text":""},{"location":"faq/airflow-operator-rfc/#snowflake-operator","title":"Snowflake operator","text":""},{"location":"faq/airflow-operator-rfc/#branch-python-operator","title":"Branch python operator","text":""},{"location":"faq/airflow-operator-rfc/#slack-operator","title":"Slack operator","text":""},{"location":"faq/airflow-operator-rfc/#email-operator","title":"Email operator","text":""},{"location":"faq/airflow-operator-rfc/#task-dependency-sensor","title":"Task dependency sensor","text":""},{"location":"faq/airflow-operator-rfc/#bash-operator","title":"Bash operator","text":""},{"location":"faq/airflow-operator-rfc/#short-circuit-operator","title":"Short circuit operator","text":""},{"location":"faq/airflow-operator-rfc/#s3-prefix-sensor","title":"S3 Prefix Sensor","text":""},{"location":"faq/airflow-operator-rfc/#operators-which-will-not-be-supported","title":"Operators which will not be supported","text":""},{"location":"faq/airflow-operator-rfc/#compute-bash-operator","title":"Compute bash operator","text":""},{"location":"faq/airflow-operator-rfc/#alternative","title":"Alternative:","text":""},{"location":"faq/airflow-operator-rfc/#compute-python-operator","title":"Compute python operator","text":""},{"location":"faq/airflow-operator-rfc/#alternative_1","title":"Alternative:","text":""},{"location":"faq/airflow-operator-rfc/#emr-operator","title":"Emr operator","text":""},{"location":"faq/airflow-operator-rfc/#alternative_2","title":"Alternative:","text":""},{"location":"faq/airflow-operator-rfc/#spark-operator","title":"Spark operator","text":""},{"location":"faq/airflow-operator-rfc/#alternative_3","title":"Alternative:","text":""},{"location":"faq/airflow-operator-rfc/#python-operator","title":"Python operator","text":""},{"location":"faq/airflow-operator-rfc/#alternative_4","title":"Alternative:","text":""},{"location":"faq/airflow-operator-rfc/#dummy-operator","title":"Dummy operator","text":""},{"location":"faq/airflow-operator-rfc/#alternative_5","title":"Alternative:","text":""},{"location":"faq/airflow-operator-rfc/#canary-operator","title":"Canary operator","text":""},{"location":"faq/airflow-operator-rfc/#alternative_6","title":"Alternative:","text":""},{"location":"faq/airflow-operator-rfc/#genie-snowflake-operator","title":"Genie snowflake operator","text":""},{"location":"faq/airflow-operator-rfc/#alternative_7","title":"Alternative:","text":""},{"location":"faq/airflow-operator-rfc/#genie-hive-operator","title":"Genie hive operator","text":""},{"location":"faq/airflow-operator-rfc/#alternative_8","title":"Alternative:","text":""},{"location":"faq/airflow-operator-rfc/#genie-s3-dist-cp-operator","title":"Genie s3 dist cp operator","text":""},{"location":"faq/airflow-operator-rfc/#alternative_9","title":"Alternative:","text":""},{"location":"faq/airflow-operator-rfc/#athena-operator","title":"Athena operator","text":""},{"location":"faq/airflow-operator-rfc/#alternative_10","title":"Alternative:","text":""},{"location":"faq/airflow-operator-rfc/#nike-emr-operator","title":"Nike emr operator","text":""},{"location":"faq/airflow-operator-rfc/#alternative_11","title":"Alternative:","text":""},{"location":"faq/airflow-operator-rfc/#nike-spark-submit-operator","title":"Nike spark submit operator","text":""},{"location":"faq/airflow-operator-rfc/#alternative_12","title":"Alternative:","text":""},{"location":"faq/airflow-operator-rfc/#compute-s3-prefix-sensor","title":"Compute s3 prefix sensor","text":""},{"location":"faq/airflow-operator-rfc/#alternative_13","title":"Alternative:","text":""},{"location":"faq/faq/","title":"Faq","text":""}]} \ No newline at end of file diff --git a/dev/sitemap.xml b/dev/sitemap.xml deleted file mode 100644 index 7addf1bb..00000000 --- a/dev/sitemap.xml +++ /dev/null @@ -1,118 +0,0 @@ - - - - https://brickflow.readthedocs.io/en/latest/dev/ - 2023-08-02 - daily - - - https://brickflow.readthedocs.io/en/latest/dev/bundles-quickstart/ - 2023-08-02 - daily - - - https://brickflow.readthedocs.io/en/latest/dev/environment-variables/ - 2023-08-02 - daily - - - https://brickflow.readthedocs.io/en/latest/dev/highlevel/ - 2023-08-02 - daily - - - https://brickflow.readthedocs.io/en/latest/dev/limitations/ - 2023-08-02 - daily - - - https://brickflow.readthedocs.io/en/latest/dev/projects/ - 2023-08-02 - daily - - - https://brickflow.readthedocs.io/en/latest/dev/quickstart/ - 2023-08-02 - daily - - - https://brickflow.readthedocs.io/en/latest/dev/tasks/ - 2023-08-02 - daily - - - https://brickflow.readthedocs.io/en/latest/dev/workflows/ - 2023-08-02 - daily - - - https://brickflow.readthedocs.io/en/latest/dev/api/airflow_external_task_dependency/ - 2023-08-02 - daily - - - https://brickflow.readthedocs.io/en/latest/dev/api/airflow_native_operators/ - 2023-08-02 - daily - - - https://brickflow.readthedocs.io/en/latest/dev/api/cli/ - 2023-08-02 - daily - - - https://brickflow.readthedocs.io/en/latest/dev/api/compute/ - 2023-08-02 - daily - - - https://brickflow.readthedocs.io/en/latest/dev/api/context/ - 2023-08-02 - daily - - - https://brickflow.readthedocs.io/en/latest/dev/api/misc/ - 2023-08-02 - daily - - - https://brickflow.readthedocs.io/en/latest/dev/api/project/ - 2023-08-02 - daily - - - https://brickflow.readthedocs.io/en/latest/dev/api/secrets/ - 2023-08-02 - daily - - - https://brickflow.readthedocs.io/en/latest/dev/api/task/ - 2023-08-02 - daily - - - https://brickflow.readthedocs.io/en/latest/dev/api/workflow/ - 2023-08-02 - daily - - - https://brickflow.readthedocs.io/en/latest/dev/api/workflow_dependency_sensor/ - 2023-08-02 - daily - - - https://brickflow.readthedocs.io/en/latest/dev/cli/reference/ - 2023-08-02 - daily - - - https://brickflow.readthedocs.io/en/latest/dev/faq/airflow-operator-rfc/ - 2023-08-02 - daily - - - https://brickflow.readthedocs.io/en/latest/dev/faq/faq/ - 2023-08-02 - daily - - \ No newline at end of file diff --git a/dev/sitemap.xml.gz b/dev/sitemap.xml.gz deleted file mode 100644 index 10ce7a87..00000000 Binary files a/dev/sitemap.xml.gz and /dev/null differ diff --git a/dev/tasks/index.html b/dev/tasks/index.html deleted file mode 100644 index 2b1d9110..00000000 --- a/dev/tasks/index.html +++ /dev/null @@ -1,1482 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - Tasks - BrickFlow - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- - - - - - - - -
- - - - - - - -
- -
- - - - -
-
- - - -
-
-
- - - - - - -
-
-
- - - -
-
-
- - - -
-
-
- - - -
-
- - - - - - - -

Tasks

- -

A task in Databricks workflows refers to a single unit of work that is executed as part of a larger data processing -pipeline. Tasks are typically designed to perform a specific set of operations on data, such as loading data from a -source, transforming the data, and storing it in a destination. In brickflow, tasks as designed in such a way that

-

Assuming, that this is already read - workflow and workflow object is created

-

Task

-

Databricks workflow task can be created by decorating a python function with brickflow's task function

-
task
from brickflow import Workflow
-wf = Workflow(...)
-
-@wf.task  # (1)!
-def start():
-    pass
-
-@wf.task(name="custom_end")  # (2)!
-def end():
-    pass
-
-
    -
  1. Create a task using a decorator pattern. The task name would default to the python function name. So a task will be - created with the name "start"
  2. -
  3. Creating a task and defining the task name explicitly instead of using the function name "end". The task will be - created with the new name "custom_end"
  4. -
-

Task dependency

-

Define task dependency by using a variable "depends_on" in the task function. You can provide the dependent tasks as -direct python callables or string or list of callables/strings

-
task_dependency
from brickflow import Workflow
-wf = Workflow(...)
-
-@wf.task
-def start():
-    pass
-
-@wf.task(depends_on=start)  # (1)!
-def bronze_layer():
-    pass
-
-@wf.task(depends_on="bronze_layer")  # (2)!
-def x_silver():
-    pass
-
-@wf.task(depends_on=bronze_layer)
-def y_silver():
-    pass
-
-@wf.task(depends_on=[x_silver, y_silver])  # (3)!
-def xy_gold():
-    pass
-
-@wf.task(name="custom_z_gold", depends_on=[x_silver, "y_silver"])  # (4)!
-def z_gold():
-    pass
-
-@wf.task(depends_on=["xy_gold", "custom_z_gold"])  # (5)!
-def end():
-    pass
-
-
    -
  1. Create dependency on task "start" and it is passed as callable
  2. -
  3. Create dependency on task "bronze_layer" and it is passed as a string
  4. -
  5. Create dependency on multiple tasks using list and the tasks are callables
  6. -
  7. Create dependency on multiple tasks using list but one task is a callable and another is a string
  8. -
  9. Create dependency on multiple tasks using list and tasks are passed as string. "custom_z_gold" is the task name that - is explicitly defined - should not use "z_gold" which is a function name
  10. -
-

Task parameters

-

Task parameters can be defined as key value pairs in the function definition on which task is defined

-
task_parameters
from brickflow import Workflow
-wf = Workflow(...)
-
-@wf.task
-def task_function(*, test="var", test1="var1"):  # (1)!
-    print(test)
-    print(test1)
-
-
    -
  1. To pass the task specific parameters, need to start with "*" and then key value pairs start
  2. -
-

Common task parameters

-

In the workflows section, we saw how the common task parameters are created at -the workflow level. Now in this section, we shall see how to use the common task parameters

-
use_common_task_parameters
from brickflow import Workflow, ctx
-wf = Workflow(...)
-
-@wf.task
-def common_params():
-    import some_pyspark_function  # (1)!
-
-    catalog_env = ctx.dbutils_widget_get_or_else(key="catalog", debug="local")  # (2)!
-    some_pyspark_function(catalog_env)  # (3)!
-
-
    -
  1. It is recommended to use localized imports in tasks rather than the global imports
  2. -
  3. Brickflow provides the context using which we can fetch the task parameters that are defined. Providing debug is - mandatory or else there will be a compilation error while deploying
  4. -
  5. The extracted task_parameter_value can be used as any python variable. In this example, we are just passing the - variable to "some_pyspark_function"
  6. -
-

Inbuilt task parameters

-

There are many inbuilt task parameters that be accessed using brickflow context like above

-
inbuilt_task_parameters
from brickflow import Workflow, ctx
-wf = Workflow(...)
-
-@wf.task
-def inbuilt_params():
-    print(ctx.dbutils_widget_get_or_else(
-        key="brickflow_env",  # (1)! 
-        debug="local"))
-    print(ctx.dbutils_widget_get_or_else(
-        key="brickflow_run_id",  # (2)! 
-        debug="788868"))
-    print(ctx.dbutils_widget_get_or_else(
-        key="brickflow_job_id",  # (3)! 
-        debug="987987987987987"))
-    print(ctx.dbutils_widget_get_or_else(
-        key="brickflow_start_date",  # (4)! 
-        debug="2023-05-03"))
-    print(ctx.dbutils_widget_get_or_else(
-        key="brickflow_start_time",  # (5)! 
-        debug="1683102411626"))
-    print(ctx.dbutils_widget_get_or_else(
-        key="brickflow_task_retry_count",  # (6)! 
-        debug="2"))
-    print(ctx.dbutils_widget_get_or_else(
-        key="brickflow_parent_run_id",  # (7)! 
-        debug="788869"))
-    print(ctx.dbutils_widget_get_or_else(
-        key="brickflow_task_key",  # (8)! 
-        debug="inbuilt_params"))
-    print(ctx.dbutils_widget_get_or_else(
-        key="brickflow_internal_workflow_name",  # (9)! 
-        debug="Sample_Workflow"))
-    print(ctx.dbutils_widget_get_or_else(
-        key="brickflow_internal_task_name",  # (10)! 
-        debug="inbuilt_params"))
-    print(ctx.dbutils_widget_get_or_else(
-        key="brickflow_internal_workflow_prefix",  # (11)! 
-        debug="inbuilt_params"))
-    print(ctx.dbutils_widget_get_or_else(
-        key="brickflow_internal_workflow_suffix",  # (12)! 
-        debug="inbuilt_params"))
-
-
    -
  1. "brickflow_env" holds the value of the --env variable which was used when brickflow is deployed
  2. -
  3. "brickflow_run_id" holds the value of the current task run id
  4. -
  5. "brickflow_job_id" holds the value of the current workflow job id
  6. -
  7. "brickflow_start_date" holds the value of the current workflow start date
  8. -
  9. "brickflow_start_time" holds the value of the current task start time
  10. -
  11. "brickflow_task_retry_count" holds the value of number of retries a task can run, when a failure occurs
  12. -
  13. "brickflow_parent_run_id" hold the value of the current workflow run_id
  14. -
  15. "brickflow_task_key" holds the value of the current task name
  16. -
  17. "brickflow_internal_workflow_name" holds the value of the current workflow name
  18. -
  19. "brickflow_internal_task_name" holds the value of the current task name
  20. -
  21. "brickflow_internal_workflow_prefix" holds the value of the prefix used for the current workflow name
  22. -
  23. "brickflow_internal_workflow_suffix" holds the value of the suffix used for the current workflow name
  24. -
-

Clusters

-

There is a flexibility to use different clusters for each task or assign custom clusters

-
clusters
from brickflow import Workflow, Cluster
-wf = Workflow(...)
-
-@wf.task(cluster=Cluster(...))  # (1)!
-def custom_cluster():
-    pass
-
-
    -
  1. You will be able to create a job cluster or use existing cluster. Refer to this section in - the workflows to understand how to implement
  2. -
-

Libraries

-

There is a flexibility to use specific libraries for a particular task

-
libraries
from brickflow import Workflow
-wf = Workflow(...)
-
-@wf.task(libraries=[...])  # (1)!
-def custom_libraries():
-    pass
-
-
    -
  1. You will be able to install libraries that are specific to a task. Refer to this section in - the workflows to understand how to implement
  2. -
-

Task types

-

There are different task types that are supported by brickflow right now. The default task type that is used by -brickflow is NOTEBOOK

-
task_types
from brickflow import Workflow, TaskType, BrickflowTriggerRule, TaskResponse
-wf = Workflow(...)
-
-@wf.task
-def notebook_task():
-   pass
-
-@wf.task(task_type=TaskType.DLT)
-def dlt_task():
-    pass
-
-@wf.task(
-   task_type=TaskType.CUSTOM_PYTHON_TASK,  # (1)!
-   trigger_rule=BrickflowTriggerRule.NONE_FAILED,  # (2)!
-   custom_execute_callback=lambda x: TaskResponse(x.name, 
-                                                  push_return_value=True),  # (3)!
-)
-def custom_python_task():
-    pass
-
-
    -
  1. Provide the task type that is to be used for this task. Default is a notebook task
  2. -
  3. Trigger rule can be attached. It can be ALL_SUCCESS or NONE_FAILED. In this case, this task will be triggered, if all - the upstream tasks are at-least run and completed.
  4. -
  5. Custom function that have to be executed as a call back. "push_return_value" will assign the output to task values. - Task values can be compared to xcom values in airflow
  6. -
-

Trigger rules

-

There are two types of trigger rules that can be applied on a task. It can be either ALL_SUCCESS or NONE_FAILED

-
task_types
from brickflow import Workflow, BrickflowTriggerRule
-wf = Workflow(...)
-
-@wf.task(
-   trigger_rule=BrickflowTriggerRule.NONE_FAILED  # (1)!
-)
-def none_failed_task():
-   pass
-
-@wf.task(
-   trigger_rule=BrickflowTriggerRule.ALL_SUCCESS  # (2)!
-)
-def all_success_task():
-   pass
-
-
    -
  1. NONE_FAILED - use this if you want to trigger the task irrespective of the upstream tasks success or failure state
  2. -
  3. ALL_SUCCESS - use this if you want to trigger the task only if all the upstream tasks are all having success state
  4. -
-

Operators

-

We have adopted/extended certain airflow operators that might be needed to run as a task in databricks workflows. -Typically for airflow operators we return the operator and brickflow will execute the operator based on task return -type.

-

Bash Operator

-

You will be able to use bash operator as below

-
bash_operator
from brickflow import Workflow
-from brickflow_plugins import BashOperator
-wf = Workflow(...)
-
-@wf.task
-def bash_task():
-    return BashOperator(task_id=bash_task.__name__, 
-                        bash_command="ls -ltr")  # (1)!
-
-
    -
  1. Use Bashoperator like how we use in airflow but it has to be returned from task function
  2. -
-

Task Dependency Sensor

-

Even if you migrate to databricks workflows, brickflow gives you the flexibility to have a dependency on the airflow job

-
task_dependency_sensor
from brickflow import Workflow, ctx
-from brickflow_plugins import TaskDependencySensor
-
-wf = Workflow(...)
-
-@wf.task
-def airflow_external_task_dependency_sensor():
-   import base64
-
-   data = base64.b64encode(
-      ctx.dbutils.secrets.get("brickflow-demo-tobedeleted", "okta_conn_id").encode(
-         "utf-8"
-      )
-   ).decode("utf-8")
-   return TaskDependencySensor(
-      task_id="sensor",
-      timeout=180,
-      okta_conn_id=f"b64://{data}",
-      external_dag_id="external_airlfow_dag",
-      external_task_id="hello",
-      allowed_states=["success"],
-      execution_delta=None,
-      execution_delta_json=None,
-      cluster_id="your_cluster_id",
-   )
-
- - - - - - -
-
- - -
- - - -
- -
- - -
- -
-
-
-
- - - - - - - - - \ No newline at end of file diff --git a/dev/workflows/index.html b/dev/workflows/index.html deleted file mode 100644 index bbce9461..00000000 --- a/dev/workflows/index.html +++ /dev/null @@ -1,1346 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - Workflows - BrickFlow - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- -
- - - - - - - - -
- - - - - - - -
- -
- - - - -
-
- - - -
-
-
- - - - - - -
-
-
- - - -
-
-
- - - -
-
-
- - - -
-
- - - - - - - -

Workflows

- -

A Workflow is similar to an Airflow dag that lets you encapsulate a set of tasks.

-

Here is an example of a workflow. -Click the plus buttons to understand all the parts of the workflow file.

-
workflow.py
from datetime import timedelta
-from brickflow import Workflow, Cluster, WorkflowPermissions, User, \
-    TaskSettings, EmailNotifications, PypiTaskLibrary, MavenTaskLibrary
-
-wf = Workflow(  # (1)!
-    "wf_test",  # (2)!
-    default_cluster=Cluster.from_existing_cluster("your_existing_cluster_id"),  # (3)!
-
-    # Optional parameters below
-    schedule_quartz_expression="0 0/20 0 ? * * *",  # (4)!
-    timezone="UTC",  # (5)!
-    default_task_settings=TaskSettings(  # (6)!
-        email_notifications=EmailNotifications(
-            on_start=["email@nike.com"],
-            on_success=["email@nike.com"],
-            on_failure=["email@nike.com"]
-        ),
-        timeout_seconds=timedelta(hours=2).seconds
-    ),
-    libraries=[  # (7)!
-        PypiTaskLibrary(package="requests"),
-        MavenTaskLibrary(coordinates="com.cronutils:cron-utils:9.2.0"),
-    ],
-    tags={  # (8)!
-        "product_id": "brickflow_demo",
-        "slack_channel": "nike-sole-brickflow-support"
-    },
-    max_concurrent_runs=1,  # (9)!
-    permissions=WorkflowPermissions(  # (10)!
-        can_manage_run=[User("abc@abc.com")],
-        can_view=[User("abc@abc.com")],
-        can_manage=[User("abc@abc.com")],
-    ),
-    prefix="feature-jira-xxx",  # (11)!
-    suffix="_qa1",  # (12)!
-    common_task_parameters={  # (13)!
-        "catalog": "development",
-        "database": "your_database"
-    },
-)
-
-
-@wf.task()  # (14)!
-def task_function(*, test="var"):
-    return "hello world"
-
-
    -
  1. Workflow definition which constructs the workflow object
  2. -
  3. Define the workflow name
  4. -
  5. The default cluster used for all the tasks in the workflow. This is an all-purpose cluster, but you can also create a job cluster
  6. -
  7. Cron expression in the quartz format
  8. -
  9. Define the timezone for your workflow. It is defaulted to UTC
  10. -
  11. Default task setting that can be used for all the tasks
  12. -
  13. Libraries that need to be installed for all the tasks
  14. -
  15. Tags for the resulting workflow and other objects created during the workflow.
  16. -
  17. Define the maximum number of concurrent runs
  18. -
  19. Define the permissions on the workflow
  20. -
  21. Prefix for the name of the workflow
  22. -
  23. Suffix for the name of the workflow
  24. -
  25. Define the common task parameters that can be used in all the tasks
  26. -
  27. Define a workflow task and associate it to the workflow
  28. -
-

Clusters

-

There are two ways to define the cluster for the workflow or a task

-

Using an existing cluster

-
existing_cluster
from brickflow import Cluster
-
-default_cluster=Cluster.from_existing_cluster("your_existing_cluster_id")
-
-

Use a job cluster

-
job_cluster
from brickflow import Cluster
-
-default_cluster=Cluster(
-    name="your_cluster_name",
-    spark_version='11.3.x-scala2.12',
-    node_type_id='m6g.xlarge',
-    driver_node_type_id='m6g.xlarge',
-    min_workers=1,
-    max_workers=3,
-    enable_elastic_disk=True,
-    policy_id='your_policy_id',
-    aws_attributes={
-        "first_on_demand": 1,
-        "availability": "SPOT_WITH_FALLBACK",
-        "instance_profile_arn": "arn:aws:iam::XXXX:instance-profile/XXXX/group/XX",
-        "spot_bid_price_percent": 100,
-        "ebs_volume_type": "GENERAL_PURPOSE_SSD",
-        "ebs_volume_count": 3,
-        "ebs_volume_size": 100
-    }
-)
-
-

Permissions

-

Brickflow provides an opportunity to manage permissions on the workflows. -You can provide individual users or to a group or to a ServicePrincipal that can help manage, run or -view the workflows.

-

Below example is for reference

-
manage_permissions
from brickflow import WorkflowPermissions, User, Group, ServicePrincipal
-
-permissions=WorkflowPermissions(
-    can_manage_run=[
-        User("abc@abc.com"), 
-        Group("app.xyz.team.Developer"), 
-        ServicePrincipal("ServicePrinciple_dbx_url.app.xyz.team.Developer")
-    ],
-    can_view=[User("abc@abc.com")],
-    can_manage=[User("abc@abc.com")],
-)
-
-

Tags

-

Using brickflow, custom tags can be created on the workflow - but there are also some default tags -that are created while the job is deployed.

-

The defaults tags that gets automatically attached to the workflow are below

-
    -
  • "brickflow_project_name" : Brickflow Project Name that is referred from the entrypoint.py file
  • -
  • "brickflow_version" : Brickflow Version that is used to deploy the workflow
  • -
  • "databricks_tf_provider_version" : Databricks terraform provider version that is used to deploy the workflow
  • -
  • "deployed_by" : Email id of the profile that is used to deploy the workflow. - It can be a user or a service principle. Whichever id is used to deploy the workflow, automatically becomes the - owner of the workflow
  • -
  • "environment" : Environment to which the workflow is identified to
  • -
-

Use the below reference to define more tags and attach to the workflow. These can be used for collecting various -metrics and build dashboards.

-
configure_tags
tags={
-        "product_id": "brickflow_demo",
-        "slack_channel": "nike-sole-brickflow-support"
-    }
-
-

Schedule

-

Databricks workflows uses Quartz cron expression unlike airflow's unix based cron scheduler. -A typical Quartz cron expression have six or seven fields, seperated by spaces

-

second minute hour day_of_month month day_of_week year(optional)
-
-Below is a sample

-
quartz_cron_expression
schedule_quartz_expression="0 0/20 0 ? * * *"
-
-

Tasksettings

-

Task setting at workflow level can be used to have common setting defined that will be applicable for -all the tasks. Below is a sample that can be used for reference and all the parameters in TaskSettings -are optional -

task_settings
from datetime import timedelta
-from brickflow import TaskSettings, EmailNotifications
-
-default_task_settings=TaskSettings(
-   email_notifications=EmailNotifications(
-      on_start=["email@nike.com"],
-      on_success=["email@nike.com"],
-      on_failure=["email@nike.com"]
-   ),
-   timeout_seconds=timedelta(hours=2).seconds,
-   max_retries=2,
-   min_retry_interval_millis=60000,
-   retry_on_timeout=True
-)
-

-

Libraries

-

Brickflow allows to specify libraries that are need to be installed and used across different tasks. -There are many ways to install library from different repositories/sources

-
libraries
from brickflow import PypiTaskLibrary, MavenTaskLibrary, StorageBasedTaskLibrary, \
-    JarTaskLibrary, EggTaskLibrary, WheelTaskLibrary
-
-libraries=[
-   PypiTaskLibrary(package="requests"),
-   MavenTaskLibrary(coordinates="com.cronutils:cron-utils:9.2.0"),
-   StorageBasedTaskLibrary("s3://..."),
-   StorageBasedTaskLibrary("dbfs://..."),
-   JarTaskLibrary("s3://..."),
-   JarTaskLibrary("dbfs://..."),
-   EggTaskLibrary("s3://..."),
-   EggTaskLibrary("dbfs://..."),
-   WheelTaskLibrary("s3://..."),
-   WheelTaskLibrary("dbfs://..."),
-]
-
-

Common task parameters

-

Define the common parameters that can be used in all the tasks. Example could be database name, secrets_id etc

-
common_task_parameters
common_task_parameters={
-        "catalog": "development",
-        "database": "your_database"
-    }
-
- - - - - - -
-
- - -
- - - -
- -
- - -
- -
-
-
-
- - - - - - - - - \ No newline at end of file diff --git a/versions.json b/versions.json index 094febd0..bb1b36a1 100644 --- a/versions.json +++ b/versions.json @@ -1 +1 @@ -[{"version": "v0.9.0", "title": "v0.9.0", "aliases": ["latest"]}, {"version": "dev", "title": "dev", "aliases": []}] \ No newline at end of file +[{"version": "v0.9.0", "title": "v0.9.0", "aliases": ["latest"]}] \ No newline at end of file