diff --git a/.github/workflows/push.yml b/.github/workflows/push.yml index 08edfb9da9..02bf73784d 100644 --- a/.github/workflows/push.yml +++ b/.github/workflows/push.yml @@ -107,11 +107,18 @@ jobs: run: npm install -g ajv-cli@5.0.0 # Assert that the generated bundle schema is a valid JSON schema by using - # ajv-cli to validate it against a sample configuration file. + # ajv-cli to validate it against bundle configuration files. # By default the ajv-cli runs in strict mode which will fail if the schema # itself is not valid. Strict mode is more strict than the JSON schema # specification. See for details: https://ajv.js.org/options.html#strict-mode-options - name: Validate bundle schema run: | go run main.go bundle schema > schema.json - ajv -s schema.json -d ./bundle/tests/basic/databricks.yml + + for file in ./bundle/internal/schema/testdata/pass/*.yml; do + ajv test -s schema.json -d $file --valid + done + + for file in ./bundle/internal/schema/testdata/fail/*.yml; do + ajv test -s schema.json -d $file --invalid + done diff --git a/bundle/internal/schema/testdata/fail/basic.yml b/bundle/internal/schema/testdata/fail/basic.yml new file mode 100644 index 0000000000..5ab981e3c5 --- /dev/null +++ b/bundle/internal/schema/testdata/fail/basic.yml @@ -0,0 +1,3 @@ +bundle: + # expected type is 'string' + name: 1234 diff --git a/bundle/internal/schema/testdata/fail/invalid_enum_value_in_job.yml b/bundle/internal/schema/testdata/fail/invalid_enum_value_in_job.yml new file mode 100644 index 0000000000..92b1e9fcf3 --- /dev/null +++ b/bundle/internal/schema/testdata/fail/invalid_enum_value_in_job.yml @@ -0,0 +1,4 @@ +resources: + jobs: + myjob: + format: INVALID_VALUE diff --git a/bundle/internal/schema/testdata/fail/invalid_enum_value_in_model.yml b/bundle/internal/schema/testdata/fail/invalid_enum_value_in_model.yml new file mode 100644 index 0000000000..278b238f46 --- /dev/null +++ b/bundle/internal/schema/testdata/fail/invalid_enum_value_in_model.yml @@ -0,0 +1,6 @@ +resources: + models: + mymodel: + latest_versions: + - creation_timestamp: 123 + status: INVALID_VALUE diff --git a/bundle/internal/schema/testdata/fail/invalid_reference_in_job.yml b/bundle/internal/schema/testdata/fail/invalid_reference_in_job.yml new file mode 100644 index 0000000000..2e0e2d84f4 --- /dev/null +++ b/bundle/internal/schema/testdata/fail/invalid_reference_in_job.yml @@ -0,0 +1,8 @@ +resources: + jobs: + outer: + name: outer job + tasks: + - task_key: run job task 1 + run_job_task: + job_id: ${invalid.reference} diff --git a/bundle/internal/schema/testdata/fail/invalid_reference_in_model.yml b/bundle/internal/schema/testdata/fail/invalid_reference_in_model.yml new file mode 100644 index 0000000000..899d6d8500 --- /dev/null +++ b/bundle/internal/schema/testdata/fail/invalid_reference_in_model.yml @@ -0,0 +1,5 @@ +resources: + models: + mymodel: + latest_versions: + - creation_timestamp: ${invalid.reference} diff --git a/bundle/internal/schema/testdata/fail/required_field_missing_in_job.yml b/bundle/internal/schema/testdata/fail/required_field_missing_in_job.yml new file mode 100644 index 0000000000..ebb8ecf6bf --- /dev/null +++ b/bundle/internal/schema/testdata/fail/required_field_missing_in_job.yml @@ -0,0 +1,9 @@ +resources: + jobs: + foo: + name: my job + tasks: + # All tasks need to have a task_key. + - notebook_task: + notebook_path: /Users/abc/notebooks/inner + existing_cluster_id: abcd diff --git a/bundle/internal/schema/testdata/fail/unknown_field_in_job.yml b/bundle/internal/schema/testdata/fail/unknown_field_in_job.yml new file mode 100644 index 0000000000..7e7e0d2d37 --- /dev/null +++ b/bundle/internal/schema/testdata/fail/unknown_field_in_job.yml @@ -0,0 +1,5 @@ +resources: + jobs: + myjob: + # unknown fields should cause schema failure. + unknown_field: "value" diff --git a/bundle/internal/schema/testdata/fail/unknown_field_in_model.yml b/bundle/internal/schema/testdata/fail/unknown_field_in_model.yml new file mode 100644 index 0000000000..a00c20935a --- /dev/null +++ b/bundle/internal/schema/testdata/fail/unknown_field_in_model.yml @@ -0,0 +1,6 @@ +resources: + models: + mymodel: + creation_timestamp: 123 + description: "my model" + unknown: "value" diff --git a/bundle/internal/schema/testdata/fail/unknown_top_level_field.yml b/bundle/internal/schema/testdata/fail/unknown_top_level_field.yml new file mode 100644 index 0000000000..e8a8866bc2 --- /dev/null +++ b/bundle/internal/schema/testdata/fail/unknown_top_level_field.yml @@ -0,0 +1 @@ +unknown: value diff --git a/bundle/internal/schema/testdata/pass/artifact_references.yml b/bundle/internal/schema/testdata/pass/artifact_references.yml new file mode 100644 index 0000000000..c9b137633e --- /dev/null +++ b/bundle/internal/schema/testdata/pass/artifact_references.yml @@ -0,0 +1,11 @@ +artifacts: + abc: + path: /Workspace/a/b/c + type: wheel + files: + - source: ./x.whl + +resources: + jobs: + foo: + name: ${artifacts.abc.type} diff --git a/bundle/internal/schema/testdata/pass/basic.yml b/bundle/internal/schema/testdata/pass/basic.yml new file mode 100644 index 0000000000..de02d20bc6 --- /dev/null +++ b/bundle/internal/schema/testdata/pass/basic.yml @@ -0,0 +1,2 @@ +bundle: + name: basic diff --git a/bundle/internal/schema/testdata/pass/direct_value_in_target.yml b/bundle/internal/schema/testdata/pass/direct_value_in_target.yml new file mode 100644 index 0000000000..5033d8cd9f --- /dev/null +++ b/bundle/internal/schema/testdata/pass/direct_value_in_target.yml @@ -0,0 +1,4 @@ +targets: + development: + variables: + myvar: value diff --git a/bundle/internal/schema/testdata/pass/job.yml b/bundle/internal/schema/testdata/pass/job.yml new file mode 100644 index 0000000000..d9b0e832f2 --- /dev/null +++ b/bundle/internal/schema/testdata/pass/job.yml @@ -0,0 +1,63 @@ +bundle: + name: a job + +workspace: + host: "https://myworkspace.com" + root_path: /abc + +presets: + name_prefix: "[DEV]" + jobs_max_concurrent_runs: 10 + +variables: + simplevar: + default: true + description: "simplevar description" + + complexvar: + default: + key1: value1 + key2: value2 + key3: + - value3 + - value4 + description: "complexvar description" + +run_as: + service_principal_name: myserviceprincipal + +resources: + jobs: + myjob: + name: myjob + continuous: + pause_status: PAUSED + edit_mode: EDITABLE + max_concurrent_runs: 10 + description: "my job description" + email_notifications: + no_alert_for_skipped_runs: true + environments: + - environment_key: venv + spec: + dependencies: + - python=3.7 + client: "myclient" + format: MULTI_TASK + tags: + foo: bar + bar: baz + tasks: + - task_key: mytask + notebook_task: + notebook_path: ${var.simplevar} + existing_cluster_id: abcd + - task_key: mytask2 + for_each_task: + inputs: av + concurrency: 10 + task: + task_key: inside_for_each + notebook_task: + notebook_path: ${var.complexvar.key3[0]} + - ${var.complexvar} diff --git a/bundle/internal/schema/testdata/pass/ml.yml b/bundle/internal/schema/testdata/pass/ml.yml new file mode 100644 index 0000000000..b1558f101f --- /dev/null +++ b/bundle/internal/schema/testdata/pass/ml.yml @@ -0,0 +1,72 @@ +bundle: + name: ML + +workspace: + host: "https://myworkspace.com" + root_path: /abc + +presets: + name_prefix: "[DEV]" + jobs_max_concurrent_runs: 10 + +variables: + simplevar: + default: "true" + description: "simplevar description" + + complexvar: + default: + key1: value1 + key2: value2 + key3: + - value3 + - value4 + description: "complexvar description" + +resources: + models: + mymodel: + creation_timestamp: 123 + description: "my model" + latest_versions: + - creation_timestamp: 123 + tags: ${var.complexvar.key1} + status: READY + permissions: + - service_principal_name: myserviceprincipal + level: CAN_MANAGE + + experiments: + myexperiment: + artifact_location: /dbfs/myexperiment + last_update_time: ${var.complexvar.key2} + lifecycle_stage: ${var.simplevar} + permissions: + - service_principal_name: myserviceprincipal + level: CAN_MANAGE + + model_serving_endpoints: + myendpoint: + config: + served_models: + - model_name: ${resources.models.mymodel.name} + model_version: abc + scale_to_zero_enabled: true + workload_size: Large + name: myendpoint + + schemas: + myschema: + catalog_name: mycatalog + name: myschema + + registered_models: + myregisteredmodel: + catalog_name: mycatalog + name: myregisteredmodel + schema_name: ${resources.schemas.myschema.name} + grants: + - principal: abcd + privileges: + - SELECT + - INSERT diff --git a/bundle/internal/schema/testdata/pass/pipeline.yml b/bundle/internal/schema/testdata/pass/pipeline.yml new file mode 100644 index 0000000000..1b2b1a10f0 --- /dev/null +++ b/bundle/internal/schema/testdata/pass/pipeline.yml @@ -0,0 +1,54 @@ +bundle: + name: a pipeline + +workspace: + host: "https://myworkspace.com" + root_path: /abc + +presets: + name_prefix: "[DEV]" + jobs_max_concurrent_runs: 10 + +variables: + simplevar: + default: true + description: "simplevar description" + + complexvar: + default: + key1: value1 + key2: value2 + key3: + - value3 + - value4 + description: "complexvar description" + +artifacts: + mywheel: + path: ./mywheel.whl + type: WHEEL + +run_as: + service_principal_name: myserviceprincipal + +resources: + jobs: + myjob: + name: myjob + tasks: + - task_key: ${bundle.name} pipeline trigger + pipeline_task: + pipeline_id: ${resources.mypipeline.id} + + pipelines: + mypipeline: + name: mypipeline + libraries: + - whl: ./mywheel.whl + catalog: 3{var.complexvar.key2} + development: true + clusters: + - autoscale: + mode: ENHANCED + max_workers: 10 + min_workers: 1 diff --git a/bundle/internal/schema/testdata/pass/quality_monitor.yml b/bundle/internal/schema/testdata/pass/quality_monitor.yml new file mode 100644 index 0000000000..a9be593298 --- /dev/null +++ b/bundle/internal/schema/testdata/pass/quality_monitor.yml @@ -0,0 +1,16 @@ +bundle: + name: quality_monitor + +resources: + quality_monitors: + myqualitymonitor: + inference_log: + granularities: + - a + - b + model_id_col: a + prediction_col: b + timestamp_col: c + problem_type: PROBLEM_TYPE_CLASSIFICATION + assets_dir: /dbfs/mnt/abc + output_schema_name: default diff --git a/bundle/internal/schema/testdata/pass/run_job_task.yml b/bundle/internal/schema/testdata/pass/run_job_task.yml new file mode 100644 index 0000000000..be2ca22cd6 --- /dev/null +++ b/bundle/internal/schema/testdata/pass/run_job_task.yml @@ -0,0 +1,56 @@ +bundle: + name: a run job task + databricks_cli_version: 0.200.0 + compute_id: "mycompute" + + +variables: + simplevar: + default: 5678 + description: "simplevar description" + + complexvar: + default: + key1: 1234 + key2: value2 + key3: + - value3 + - 9999 + description: "complexvar description" + +resources: + jobs: + inner: + permissions: + - user_name: user1 + level: CAN_MANAGE + + name: inner job + tasks: + - task_key: inner notebook task + notebook_task: + notebook_path: /Users/abc/notebooks/inner + existing_cluster_id: abcd + + outer: + name: outer job + tasks: + - task_key: run job task 1 + run_job_task: + job_id: 1234 + + - task_key: run job task 2 + run_job_task: + job_id: ${var.complexvar.key1} + + - task_key: run job task 3 + run_job_task: + job_id: ${var.simplevar} + + - task_key: run job task 4 + run_job_task: + job_id: ${resources.inner.id} + + - task_key: run job task 5 + run_job_task: + job_id: ${var.complexvar.key3[1]} diff --git a/bundle/internal/schema/testdata/pass/schema.yml b/bundle/internal/schema/testdata/pass/schema.yml new file mode 100644 index 0000000000..37d0f6f7a4 --- /dev/null +++ b/bundle/internal/schema/testdata/pass/schema.yml @@ -0,0 +1,24 @@ +bundle: + name: basic + +variables: + complexvar: + default: + key1: 1234 + key2: value2 + key3: + - value3 + - 9999 + description: complexvar description + +resources: + schemas: + myschema: + name: myschema + catalog_name: main + grants: + - ${var.complexvar} + - principal: ${workspace.current_user.me} + privileges: + - ${var.complexvar.key3[0]} + - ${var.complexvar.key2}