From 9eae66e29e6856bd8cecb3ebceebe39c08c4851a Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Wed, 11 Sep 2024 13:39:26 +0200 Subject: [PATCH] Demonstrate how to use a development cluster Note: this requires https://github.com/databricks/cli/pull/1698. --- knowledge_base/development_cluster/README.md | 3 ++ .../development_cluster/databricks.yml | 37 +++++++++++++++++++ .../resources/example_job.yml | 22 +++++++++++ .../development_cluster/src/hello.py | 3 ++ 4 files changed, 65 insertions(+) create mode 100644 knowledge_base/development_cluster/README.md create mode 100644 knowledge_base/development_cluster/databricks.yml create mode 100644 knowledge_base/development_cluster/resources/example_job.yml create mode 100644 knowledge_base/development_cluster/src/hello.py diff --git a/knowledge_base/development_cluster/README.md b/knowledge_base/development_cluster/README.md new file mode 100644 index 0000000..304d393 --- /dev/null +++ b/knowledge_base/development_cluster/README.md @@ -0,0 +1,3 @@ +# Development cluster + +(tbd) diff --git a/knowledge_base/development_cluster/databricks.yml b/knowledge_base/development_cluster/databricks.yml new file mode 100644 index 0000000..dc3bcc3 --- /dev/null +++ b/knowledge_base/development_cluster/databricks.yml @@ -0,0 +1,37 @@ +bundle: + name: development_cluster + +include: + - resources/*.yml + +workspace: + host: https://e2-dogfood.staging.cloud.databricks.com + +targets: + dev: + mode: development + default: true + + # By configuring this field for the "dev" target, all jobs in this bundle + # are overridden to use the all-purpose cluster defined below. + # + # This can increase the speed of development when iterating on code and job definitions, + # as you don't have to wait for job clusters to start for every job run. + # + # Note: make sure that the cluster configuration below matches the job cluster + # definition that will be used when deploying the other targets. + cluster_id: ${resources.clusters.development_cluster.id} + + resources: + clusters: + development_cluster: + cluster_name: Development cluster + spark_version: 15.4.x-scala2.12 + node_type_id: i3.xlarge + num_workers: 0 + autotermination_minutes: 30 + spark_conf: + "spark.databricks.cluster.profile": "singleNode" + "spark.master": "local[*, 4]" + custom_tags: + "ResourceClass": "SingleNode" diff --git a/knowledge_base/development_cluster/resources/example_job.yml b/knowledge_base/development_cluster/resources/example_job.yml new file mode 100644 index 0000000..d702cf5 --- /dev/null +++ b/knowledge_base/development_cluster/resources/example_job.yml @@ -0,0 +1,22 @@ +resources: + jobs: + example_job: + name: "Example job to demonstrate using an interactive cluster for development" + + tasks: + - task_key: notebook + job_cluster_key: cluster + notebook_task: + notebook_path: ../src/hello.py + + job_clusters: + - job_cluster_key: cluster + new_cluster: + spark_version: 15.4.x-scala2.12 + node_type_id: i3.xlarge + num_workers: 0 + spark_conf: + "spark.databricks.cluster.profile": "singleNode" + "spark.master": "local[*, 4]" + custom_tags: + "ResourceClass": "SingleNode" diff --git a/knowledge_base/development_cluster/src/hello.py b/knowledge_base/development_cluster/src/hello.py new file mode 100644 index 0000000..24dc150 --- /dev/null +++ b/knowledge_base/development_cluster/src/hello.py @@ -0,0 +1,3 @@ +# Databricks notebook source + +print("Hello, World!")