From ce23f8fc41465d648111444582a87ccda9fd5e5b Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Thu, 6 Jul 2023 23:02:14 +0200 Subject: [PATCH 1/8] Update inline JSON schema documentation --- bundle/schema/docs/bundle_descriptions.json | 582 +++++++++++++++++++- 1 file changed, 574 insertions(+), 8 deletions(-) diff --git a/bundle/schema/docs/bundle_descriptions.json b/bundle/schema/docs/bundle_descriptions.json index ea7804186cc..dd2e5bd2a34 100644 --- a/bundle/schema/docs/bundle_descriptions.json +++ b/bundle/schema/docs/bundle_descriptions.json @@ -20,6 +20,17 @@ "bundle": { "description": "The details for this bundle.", "properties": { + "git": { + "description": "", + "properties": { + "branch": { + "description": "" + }, + "origin_url": { + "description": "" + } + } + }, "name": { "description": "The name of the bundle." } @@ -49,6 +60,17 @@ "bundle": { "description": "The details for this bundle.", "properties": { + "git": { + "description": "", + "properties": { + "branch": { + "description": "" + }, + "origin_url": { + "description": "" + } + } + }, "name": { "description": "The name of the bundle." } @@ -125,6 +147,25 @@ "additionalproperties": { "description": "", "properties": { + "compute": { + "description": "", + "items": { + "description": "", + "properties": { + "compute_key": { + "description": "" + }, + "spec": { + "description": "", + "properties": { + "kind": { + "description": "" + } + } + } + } + } + }, "continuous": { "description": "", "properties": { @@ -326,6 +367,28 @@ "description": "" } }, + "data_security_mode": { + "description": "" + }, + "docker_image": { + "description": "", + "properties": { + "basic_auth": { + "description": "", + "properties": { + "password": { + "description": "" + }, + "username": { + "description": "" + } + } + }, + "url": { + "description": "" + } + } + }, "driver_instance_pool_id": { "description": "The optional ID of the instance pool for the driver of the cluster belongs.\nThe pool cluster uses the instance pool with id (instance_pool_id) if the driver pool is not\nassigned." }, @@ -349,6 +412,59 @@ }, "google_service_account": { "description": "If provided, the cluster will impersonate the google service account when accessing\ngcloud services (like GCS). The google service account\nmust have previously been added to the Databricks environment by an account\nadministrator." + }, + "local_ssd_count": { + "description": "" + } + } + }, + "init_scripts": { + "description": "", + "items": { + "description": "", + "properties": { + "dbfs": { + "description": "", + "properties": { + "destination": { + "description": "" + } + } + }, + "s3": { + "description": "", + "properties": { + "canned_acl": { + "description": "" + }, + "destination": { + "description": "" + }, + "enable_encryption": { + "description": "" + }, + "encryption_type": { + "description": "" + }, + "endpoint": { + "description": "" + }, + "kms_key": { + "description": "" + }, + "region": { + "description": "" + } + } + }, + "workspace": { + "description": "", + "properties": { + "destination": { + "description": "" + } + } + } } } }, @@ -367,6 +483,9 @@ "runtime_engine": { "description": "" }, + "single_user_name": { + "description": "" + }, "spark_conf": { "description": "An object containing a set of optional, user-specified Spark configuration key-value pairs.\nUsers can also pass in a string of extra JVM options to the driver and the executors via\n`spark.driver.extraJavaOptions` and `spark.executor.extraJavaOptions` respectively.\n", "additionalproperties": { @@ -415,6 +534,31 @@ "name": { "description": "An optional name for the job." }, + "notification_settings": { + "description": "", + "properties": { + "no_alert_for_canceled_runs": { + "description": "" + }, + "no_alert_for_skipped_runs": { + "description": "" + } + } + }, + "parameters": { + "description": "", + "items": { + "description": "", + "properties": { + "default": { + "description": "" + }, + "name": { + "description": "" + } + } + } + }, "permissions": { "description": "", "items": { @@ -435,6 +579,17 @@ } } }, + "run_as": { + "description": "", + "properties": { + "service_principal_name": { + "description": "" + }, + "user_name": { + "description": "" + } + } + }, "schedule": { "description": "An optional periodic schedule for this job. The default behavior is that the job only runs when triggered by clicking “Run Now” in the Jobs UI or sending an API request to `runNow`.", "properties": { @@ -460,6 +615,23 @@ "items": { "description": "", "properties": { + "compute_key": { + "description": "" + }, + "condition_task": { + "description": "", + "properties": { + "left": { + "description": "" + }, + "op": { + "description": "" + }, + "right": { + "description": "" + } + } + }, "dbt_task": { "description": "If dbt_task, indicates that this must execute a dbt task. It requires both Databricks SQL and the ability to use a serverless or a pro SQL warehouse.", "properties": { @@ -491,6 +663,9 @@ "items": { "description": "", "properties": { + "outcome": { + "description": "" + }, "task_key": { "description": "" } @@ -503,9 +678,6 @@ "email_notifications": { "description": "An optional set of email addresses that is notified when runs of this job begin or complete as well as when this job is deleted. The default behavior is to not send any emails.", "properties": { - "no_alert_for_skipped_runs": { - "description": "If true, do not send email to recipients specified in `on_failure` if the run is skipped." - }, "on_failure": { "description": "A list of email addresses to be notified when a run unsuccessfully completes. A run is considered to have completed unsuccessfully if it ends with an `INTERNAL_ERROR` `life_cycle_state` or a `SKIPPED`, `FAILED`, or `TIMED_OUT` result_state. If this is not specified on job creation, reset, or update the list is empty, and notifications are not sent.", "items": { @@ -722,6 +894,28 @@ "description": "" } }, + "data_security_mode": { + "description": "" + }, + "docker_image": { + "description": "", + "properties": { + "basic_auth": { + "description": "", + "properties": { + "password": { + "description": "" + }, + "username": { + "description": "" + } + } + }, + "url": { + "description": "" + } + } + }, "driver_instance_pool_id": { "description": "The optional ID of the instance pool for the driver of the cluster belongs.\nThe pool cluster uses the instance pool with id (instance_pool_id) if the driver pool is not\nassigned." }, @@ -745,6 +939,59 @@ }, "google_service_account": { "description": "If provided, the cluster will impersonate the google service account when accessing\ngcloud services (like GCS). The google service account\nmust have previously been added to the Databricks environment by an account\nadministrator." + }, + "local_ssd_count": { + "description": "" + } + } + }, + "init_scripts": { + "description": "", + "items": { + "description": "", + "properties": { + "dbfs": { + "description": "", + "properties": { + "destination": { + "description": "" + } + } + }, + "s3": { + "description": "", + "properties": { + "canned_acl": { + "description": "" + }, + "destination": { + "description": "" + }, + "enable_encryption": { + "description": "" + }, + "encryption_type": { + "description": "" + }, + "endpoint": { + "description": "" + }, + "kms_key": { + "description": "" + }, + "region": { + "description": "" + } + } + }, + "workspace": { + "description": "", + "properties": { + "destination": { + "description": "" + } + } + } } } }, @@ -763,6 +1010,9 @@ "runtime_engine": { "description": "" }, + "single_user_name": { + "description": "" + }, "spark_conf": { "description": "An object containing a set of optional, user-specified Spark configuration key-value pairs.\nUsers can also pass in a string of extra JVM options to the driver and the executors via\n`spark.driver.extraJavaOptions` and `spark.executor.extraJavaOptions` respectively.\n", "additionalproperties": { @@ -819,6 +1069,20 @@ } } }, + "notification_settings": { + "description": "", + "properties": { + "alert_on_last_attempt": { + "description": "" + }, + "no_alert_for_canceled_runs": { + "description": "" + }, + "no_alert_for_skipped_runs": { + "description": "" + } + } + }, "pipeline_task": { "description": "If pipeline_task, indicates that this task must execute a Pipeline.", "properties": { @@ -856,6 +1120,9 @@ "retry_on_timeout": { "description": "An optional policy to specify whether to retry a task when it times out. The default behavior is to not retry on timeout." }, + "run_if": { + "description": "" + }, "spark_jar_task": { "description": "If spark_jar_task, indicates that this task must run a JAR.", "properties": { @@ -884,6 +1151,9 @@ }, "python_file": { "description": "" + }, + "source": { + "description": "" } } }, @@ -954,6 +1224,14 @@ } } }, + "file": { + "description": "", + "properties": { + "path": { + "description": "" + } + } + }, "parameters": { "description": "Parameters to be used for each run of this job. The SQL alert task does not support custom parameters.", "additionalproperties": { @@ -991,7 +1269,7 @@ "file_arrival": { "description": "", "properties": { - "min_time_between_trigger_seconds": { + "min_time_between_triggers_seconds": { "description": "" }, "url": { @@ -1315,6 +1593,9 @@ }, "google_service_account": { "description": "If provided, the cluster will impersonate the google service account when accessing\ngcloud services (like GCS). The google service account\nmust have previously been added to the Databricks environment by an account\nadministrator." + }, + "local_ssd_count": { + "description": "" } } }, @@ -1462,6 +1743,9 @@ "photon": { "description": "Whether Photon is enabled for this pipeline." }, + "serverless": { + "description": "" + }, "storage": { "description": "DBFS root directory for storing checkpoints and tables." }, @@ -1492,6 +1776,12 @@ } } }, + "variables": { + "description": "", + "additionalproperties": { + "description": "" + } + }, "workspace": { "description": "Configures which workspace to connect to and locations for files, state, and similar locations within the workspace file tree.", "properties": { @@ -1613,6 +1903,25 @@ "additionalproperties": { "description": "", "properties": { + "compute": { + "description": "", + "items": { + "description": "", + "properties": { + "compute_key": { + "description": "" + }, + "spec": { + "description": "", + "properties": { + "kind": { + "description": "" + } + } + } + } + } + }, "continuous": { "description": "", "properties": { @@ -1814,6 +2123,28 @@ "description": "" } }, + "data_security_mode": { + "description": "" + }, + "docker_image": { + "description": "", + "properties": { + "basic_auth": { + "description": "", + "properties": { + "password": { + "description": "" + }, + "username": { + "description": "" + } + } + }, + "url": { + "description": "" + } + } + }, "driver_instance_pool_id": { "description": "The optional ID of the instance pool for the driver of the cluster belongs.\nThe pool cluster uses the instance pool with id (instance_pool_id) if the driver pool is not\nassigned." }, @@ -1837,6 +2168,59 @@ }, "google_service_account": { "description": "If provided, the cluster will impersonate the google service account when accessing\ngcloud services (like GCS). The google service account\nmust have previously been added to the Databricks environment by an account\nadministrator." + }, + "local_ssd_count": { + "description": "" + } + } + }, + "init_scripts": { + "description": "", + "items": { + "description": "", + "properties": { + "dbfs": { + "description": "", + "properties": { + "destination": { + "description": "" + } + } + }, + "s3": { + "description": "", + "properties": { + "canned_acl": { + "description": "" + }, + "destination": { + "description": "" + }, + "enable_encryption": { + "description": "" + }, + "encryption_type": { + "description": "" + }, + "endpoint": { + "description": "" + }, + "kms_key": { + "description": "" + }, + "region": { + "description": "" + } + } + }, + "workspace": { + "description": "", + "properties": { + "destination": { + "description": "" + } + } + } } } }, @@ -1855,6 +2239,9 @@ "runtime_engine": { "description": "" }, + "single_user_name": { + "description": "" + }, "spark_conf": { "description": "An object containing a set of optional, user-specified Spark configuration key-value pairs.\nUsers can also pass in a string of extra JVM options to the driver and the executors via\n`spark.driver.extraJavaOptions` and `spark.executor.extraJavaOptions` respectively.\n", "additionalproperties": { @@ -1903,6 +2290,31 @@ "name": { "description": "An optional name for the job." }, + "notification_settings": { + "description": "", + "properties": { + "no_alert_for_canceled_runs": { + "description": "" + }, + "no_alert_for_skipped_runs": { + "description": "" + } + } + }, + "parameters": { + "description": "", + "items": { + "description": "", + "properties": { + "default": { + "description": "" + }, + "name": { + "description": "" + } + } + } + }, "permissions": { "description": "", "items": { @@ -1923,6 +2335,17 @@ } } }, + "run_as": { + "description": "", + "properties": { + "service_principal_name": { + "description": "" + }, + "user_name": { + "description": "" + } + } + }, "schedule": { "description": "An optional periodic schedule for this job. The default behavior is that the job only runs when triggered by clicking “Run Now” in the Jobs UI or sending an API request to `runNow`.", "properties": { @@ -1948,6 +2371,23 @@ "items": { "description": "", "properties": { + "compute_key": { + "description": "" + }, + "condition_task": { + "description": "", + "properties": { + "left": { + "description": "" + }, + "op": { + "description": "" + }, + "right": { + "description": "" + } + } + }, "dbt_task": { "description": "If dbt_task, indicates that this must execute a dbt task. It requires both Databricks SQL and the ability to use a serverless or a pro SQL warehouse.", "properties": { @@ -1979,6 +2419,9 @@ "items": { "description": "", "properties": { + "outcome": { + "description": "" + }, "task_key": { "description": "" } @@ -1991,9 +2434,6 @@ "email_notifications": { "description": "An optional set of email addresses that is notified when runs of this job begin or complete as well as when this job is deleted. The default behavior is to not send any emails.", "properties": { - "no_alert_for_skipped_runs": { - "description": "If true, do not send email to recipients specified in `on_failure` if the run is skipped." - }, "on_failure": { "description": "A list of email addresses to be notified when a run unsuccessfully completes. A run is considered to have completed unsuccessfully if it ends with an `INTERNAL_ERROR` `life_cycle_state` or a `SKIPPED`, `FAILED`, or `TIMED_OUT` result_state. If this is not specified on job creation, reset, or update the list is empty, and notifications are not sent.", "items": { @@ -2210,6 +2650,28 @@ "description": "" } }, + "data_security_mode": { + "description": "" + }, + "docker_image": { + "description": "", + "properties": { + "basic_auth": { + "description": "", + "properties": { + "password": { + "description": "" + }, + "username": { + "description": "" + } + } + }, + "url": { + "description": "" + } + } + }, "driver_instance_pool_id": { "description": "The optional ID of the instance pool for the driver of the cluster belongs.\nThe pool cluster uses the instance pool with id (instance_pool_id) if the driver pool is not\nassigned." }, @@ -2233,6 +2695,59 @@ }, "google_service_account": { "description": "If provided, the cluster will impersonate the google service account when accessing\ngcloud services (like GCS). The google service account\nmust have previously been added to the Databricks environment by an account\nadministrator." + }, + "local_ssd_count": { + "description": "" + } + } + }, + "init_scripts": { + "description": "", + "items": { + "description": "", + "properties": { + "dbfs": { + "description": "", + "properties": { + "destination": { + "description": "" + } + } + }, + "s3": { + "description": "", + "properties": { + "canned_acl": { + "description": "" + }, + "destination": { + "description": "" + }, + "enable_encryption": { + "description": "" + }, + "encryption_type": { + "description": "" + }, + "endpoint": { + "description": "" + }, + "kms_key": { + "description": "" + }, + "region": { + "description": "" + } + } + }, + "workspace": { + "description": "", + "properties": { + "destination": { + "description": "" + } + } + } } } }, @@ -2251,6 +2766,9 @@ "runtime_engine": { "description": "" }, + "single_user_name": { + "description": "" + }, "spark_conf": { "description": "An object containing a set of optional, user-specified Spark configuration key-value pairs.\nUsers can also pass in a string of extra JVM options to the driver and the executors via\n`spark.driver.extraJavaOptions` and `spark.executor.extraJavaOptions` respectively.\n", "additionalproperties": { @@ -2307,6 +2825,20 @@ } } }, + "notification_settings": { + "description": "", + "properties": { + "alert_on_last_attempt": { + "description": "" + }, + "no_alert_for_canceled_runs": { + "description": "" + }, + "no_alert_for_skipped_runs": { + "description": "" + } + } + }, "pipeline_task": { "description": "If pipeline_task, indicates that this task must execute a Pipeline.", "properties": { @@ -2344,6 +2876,9 @@ "retry_on_timeout": { "description": "An optional policy to specify whether to retry a task when it times out. The default behavior is to not retry on timeout." }, + "run_if": { + "description": "" + }, "spark_jar_task": { "description": "If spark_jar_task, indicates that this task must run a JAR.", "properties": { @@ -2372,6 +2907,9 @@ }, "python_file": { "description": "" + }, + "source": { + "description": "" } } }, @@ -2442,6 +2980,14 @@ } } }, + "file": { + "description": "", + "properties": { + "path": { + "description": "" + } + } + }, "parameters": { "description": "Parameters to be used for each run of this job. The SQL alert task does not support custom parameters.", "additionalproperties": { @@ -2479,7 +3025,7 @@ "file_arrival": { "description": "", "properties": { - "min_time_between_trigger_seconds": { + "min_time_between_triggers_seconds": { "description": "" }, "url": { @@ -2803,6 +3349,9 @@ }, "google_service_account": { "description": "If provided, the cluster will impersonate the google service account when accessing\ngcloud services (like GCS). The google service account\nmust have previously been added to the Databricks environment by an account\nadministrator." + }, + "local_ssd_count": { + "description": "" } } }, @@ -2950,6 +3499,9 @@ "photon": { "description": "Whether Photon is enabled for this pipeline." }, + "serverless": { + "description": "" + }, "storage": { "description": "DBFS root directory for storing checkpoints and tables." }, @@ -2980,6 +3532,20 @@ } } }, + "variables": { + "description": "", + "additionalproperties": { + "description": "", + "properties": { + "default": { + "description": "" + }, + "description": { + "description": "" + } + } + } + }, "workspace": { "description": "Configures which workspace to connect to and locations for files, state, and similar locations within the workspace file tree.", "properties": { From 414a6848ce55434e607dd98b520fd196f6b4cc4f Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Fri, 7 Jul 2023 01:20:08 +0200 Subject: [PATCH 2/8] Update JSON schema docs to use the latest openapi spec --- bundle/schema/docs/bundle_descriptions.json | 404 ++++++++++---------- 1 file changed, 202 insertions(+), 202 deletions(-) diff --git a/bundle/schema/docs/bundle_descriptions.json b/bundle/schema/docs/bundle_descriptions.json index dd2e5bd2a34..6f546aba168 100644 --- a/bundle/schema/docs/bundle_descriptions.json +++ b/bundle/schema/docs/bundle_descriptions.json @@ -80,10 +80,10 @@ "description": "" }, "resources": { - "description": "Collection of Databricks resources to deploy.", + "description": "Specification of databricks resources to instantiate", "properties": { "experiments": { - "description": "List of MLflow experiments", + "description": "", "additionalproperties": { "description": "", "properties": { @@ -148,18 +148,18 @@ "description": "", "properties": { "compute": { - "description": "", + "description": "A list of compute requirements that can be referenced by tasks of this job.", "items": { "description": "", "properties": { "compute_key": { - "description": "" + "description": "A unique name for the compute requirement. This field is required and must be unique within the job.\n`JobTaskSettings` may refer to this field to determine the compute requirements for the task execution." }, "spec": { "description": "", "properties": { "kind": { - "description": "" + "description": "The kind of compute described by this compute specification." } } } @@ -167,10 +167,10 @@ } }, "continuous": { - "description": "", + "description": "An optional continuous property for this job. The continuous property will ensure that there is always one run executing. Only one of `schedule` and `continuous` can be used.", "properties": { "pause_status": { - "description": "" + "description": "Whether this trigger is paused or not." } } }, @@ -181,7 +181,7 @@ "description": "If true, do not send email to recipients specified in `on_failure` if the run is skipped." }, "on_failure": { - "description": "A list of email addresses to be notified when a run unsuccessfully completes. A run is considered to have completed unsuccessfully if it ends with an `INTERNAL_ERROR` `life_cycle_state` or a `SKIPPED`, `FAILED`, or `TIMED_OUT` result_state. If this is not specified on job creation, reset, or update the list is empty, and notifications are not sent.", + "description": "A list of email addresses to be notified when a run unsuccessfully completes. A run is considered to have completed unsuccessfully if it ends with an `INTERNAL_ERROR` `life_cycle_state` or a `FAILED`, or `TIMED_OUT` result_state. If this is not specified on job creation, reset, or update the list is empty, and notifications are not sent.", "items": { "description": "" } @@ -193,7 +193,7 @@ } }, "on_success": { - "description": "A list of email addresses to be notified when a run successfully completes. A run is considered to have completed successfully if it ends with a `TERMINATED` `life_cycle_state` and a `SUCCESSFUL` result_state. If not specified on job creation, reset, or update, the list is empty, and notifications are not sent.", + "description": "A list of email addresses to be notified when a run successfully completes. A run is considered to have completed successfully if it ends with a `TERMINATED` `life_cycle_state` and a `SUCCESS` result_state. If not specified on job creation, reset, or update, the list is empty, and notifications are not sent.", "items": { "description": "" } @@ -240,7 +240,7 @@ "description": "A unique name for the job cluster. This field is required and must be unique within the job.\n`JobTaskSettings` may refer to this field to determine which cluster to launch for the task execution." }, "new_cluster": { - "description": "If new_cluster, a description of a cluster that is created for each task.", + "description": "If new_cluster, a description of a cluster that is created for only for this task.", "properties": { "autoscale": { "description": "Parameters needed in order to automatically scale clusters up and down based on load.\nNote: autoscaling works best with DB runtime versions 3.0 or later.", @@ -328,7 +328,7 @@ } }, "s3": { - "description": "destination and either region or endpoint should also be provided. e.g.\n`{ \"s3\": { \"destination\" : \"s3://cluster_log_bucket/prefix\", \"region\" : \"us-west-2\" } }`\nCluster iam role is used to access s3, please make sure the cluster iam role in\n`instance_profile_arn` has permission to write data to the s3 destination.", + "description": "destination and either the region or endpoint need to be provided. e.g.\n`{ \"s3\": { \"destination\" : \"s3://cluster_log_bucket/prefix\", \"region\" : \"us-west-2\" } }`\nCluster iam role is used to access s3, please make sure the cluster iam role in\n`instance_profile_arn` has permission to write data to the s3 destination.", "properties": { "canned_acl": { "description": "(Optional) Set canned access control list for the logs, e.g. `bucket-owner-full-control`.\nIf `canned_cal` is set, please make sure the cluster iam role has `s3:PutObjectAcl` permission on\nthe destination bucket and prefix. The full list of possible canned acl can be found at\nhttp://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl.\nPlease also note that by default only the object owner gets full controls. If you are using cross account\nrole for writing data, you may want to set `bucket-owner-full-control` to make bucket owner able to\nread the logs." @@ -377,15 +377,15 @@ "description": "", "properties": { "password": { - "description": "" + "description": "Password of the user" }, "username": { - "description": "" + "description": "Name of the user" } } }, "url": { - "description": "" + "description": "URL of the docker image." } } }, @@ -414,54 +414,54 @@ "description": "If provided, the cluster will impersonate the google service account when accessing\ngcloud services (like GCS). The google service account\nmust have previously been added to the Databricks environment by an account\nadministrator." }, "local_ssd_count": { - "description": "" + "description": "If provided, each node (workers and driver) in the cluster will have this number of local SSDs attached. Each local SSD is 375GB in size. Refer to [GCP documentation](https://cloud.google.com/compute/docs/disks/local-ssd#choose_number_local_ssds) for the supported number of local SSDs for each instance type." } } }, "init_scripts": { - "description": "", + "description": "The configuration for storing init scripts. Any number of destinations can be specified. The scripts are executed sequentially in the order provided. If `cluster_log_conf` is specified, init script logs are sent to `\u003cdestination\u003e/\u003ccluster-ID\u003e/init_scripts`.", "items": { "description": "", "properties": { "dbfs": { - "description": "", + "description": "destination needs to be provided. e.g.\n`{ \"dbfs\" : { \"destination\" : \"dbfs:/home/cluster_log\" } }`", "properties": { "destination": { - "description": "" + "description": "dbfs destination, e.g. `dbfs:/my/path`" } } }, "s3": { - "description": "", + "description": "destination and either the region or endpoint need to be provided. e.g.\n`{ \"s3\": { \"destination\" : \"s3://cluster_log_bucket/prefix\", \"region\" : \"us-west-2\" } }`\nCluster iam role is used to access s3, please make sure the cluster iam role in\n`instance_profile_arn` has permission to write data to the s3 destination.", "properties": { "canned_acl": { - "description": "" + "description": "(Optional) Set canned access control list for the logs, e.g. `bucket-owner-full-control`.\nIf `canned_cal` is set, please make sure the cluster iam role has `s3:PutObjectAcl` permission on\nthe destination bucket and prefix. The full list of possible canned acl can be found at\nhttp://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl.\nPlease also note that by default only the object owner gets full controls. If you are using cross account\nrole for writing data, you may want to set `bucket-owner-full-control` to make bucket owner able to\nread the logs." }, "destination": { - "description": "" + "description": "S3 destination, e.g. `s3://my-bucket/some-prefix` Note that logs will be delivered using\ncluster iam role, please make sure you set cluster iam role and the role has write access to the\ndestination. Please also note that you cannot use AWS keys to deliver logs." }, "enable_encryption": { - "description": "" + "description": "(Optional) Flag to enable server side encryption, `false` by default." }, "encryption_type": { - "description": "" + "description": "(Optional) The encryption type, it could be `sse-s3` or `sse-kms`. It will be used only when\nencryption is enabled and the default type is `sse-s3`." }, "endpoint": { - "description": "" + "description": "S3 endpoint, e.g. `https://s3-us-west-2.amazonaws.com`. Either region or endpoint needs to be set.\nIf both are set, endpoint will be used." }, "kms_key": { - "description": "" + "description": "(Optional) Kms key which will be used if encryption is enabled and encryption type is set to `sse-kms`." }, "region": { - "description": "" + "description": "S3 region, e.g. `us-west-2`. Either region or endpoint needs to be set. If both are set,\nendpoint will be used." } } }, "workspace": { - "description": "", + "description": "destination needs to be provided. e.g.\n`{ \"workspace\" : { \"destination\" : \"/Users/user1@databricks.com/my-init.sh\" } }`", "properties": { "destination": { - "description": "" + "description": "workspace files destination, e.g. `/Users/user1@databricks.com/my-init.sh`" } } } @@ -484,7 +484,7 @@ "description": "" }, "single_user_name": { - "description": "" + "description": "Single user name if data_security_mode is `SINGLE_USER`" }, "spark_conf": { "description": "An object containing a set of optional, user-specified Spark configuration key-value pairs.\nUsers can also pass in a string of extra JVM options to the driver and the executors via\n`spark.driver.extraJavaOptions` and `spark.executor.extraJavaOptions` respectively.\n", @@ -535,26 +535,26 @@ "description": "An optional name for the job." }, "notification_settings": { - "description": "", + "description": "Optional notification settings that are used when sending notifications to each of the `email_notifications` and `webhook_notifications` for this job.", "properties": { "no_alert_for_canceled_runs": { - "description": "" + "description": "If true, do not send notifications to recipients specified in `on_failure` if the run is canceled." }, "no_alert_for_skipped_runs": { - "description": "" + "description": "If true, do not send notifications to recipients specified in `on_failure` if the run is skipped." } } }, "parameters": { - "description": "", + "description": "Job-level parameter definitions", "items": { "description": "", "properties": { "default": { - "description": "" + "description": "Default value of the parameter." }, "name": { - "description": "" + "description": "The name of the defined parameter. May only contain alphanumeric characters, `_`, `-`, and `.`" } } } @@ -583,10 +583,10 @@ "description": "", "properties": { "service_principal_name": { - "description": "" + "description": "Application ID of an active service principal. Setting this field requires the `servicePrincipal/user` role." }, "user_name": { - "description": "" + "description": "The email of an active workspace user. Non-admin users can only set this field to their own email." } } }, @@ -594,7 +594,7 @@ "description": "An optional periodic schedule for this job. The default behavior is that the job only runs when triggered by clicking “Run Now” in the Jobs UI or sending an API request to `runNow`.", "properties": { "pause_status": { - "description": "Indicate whether this schedule is paused or not." + "description": "Whether this trigger is paused or not." }, "quartz_cron_expression": { "description": "A Cron expression using Quartz syntax that describes the schedule for a job.\nSee [Cron Trigger](http://www.quartz-scheduler.org/documentation/quartz-2.3.0/tutorials/crontrigger.html)\nfor details. This field is required.\"\n" @@ -616,19 +616,19 @@ "description": "", "properties": { "compute_key": { - "description": "" + "description": "The key of the compute requirement, specified in `job.settings.compute`, to use for execution of this task." }, "condition_task": { - "description": "", + "description": "If condition_task, specifies a condition with an outcome that can be used to control the execution of other tasks. Does not require a cluster to execute and does not support retries or notifications.", "properties": { "left": { - "description": "" + "description": "The left operand of the condition task. Can be either a string value or a job state or parameter reference." }, "op": { - "description": "" + "description": "* `EQUAL_TO`, `NOT_EQUAL` operators perform string comparison of their operands. This means that `“12.0” == “12”` will evaluate to `false`.\n* `GREATER_THAN`, `GREATER_THAN_OR_EQUAL`, `LESS_THAN`, `LESS_THAN_OR_EQUAL` operators perform numeric comparison of their operands. `“12.0” \u003e= “12”` will evaluate to `true`, `“10.0” \u003e= “12”` will evaluate to `false`.\n\nThe boolean comparison to task values can be implemented with operators `EQUAL_TO`, `NOT_EQUAL`. If a task value was set to a boolean value, it will be serialized to `“true”` or `“false”` for the comparison.\n" }, "right": { - "description": "" + "description": "The right operand of the condition task. Can be either a string value or a job state or parameter reference." } } }, @@ -659,15 +659,15 @@ } }, "depends_on": { - "description": "", + "description": "An optional array of objects specifying the dependency graph of the task. All tasks specified in this field must complete successfully before executing this task.\nThe key is `task_key`, and the value is the name assigned to the dependent task.\n", "items": { "description": "", "properties": { "outcome": { - "description": "" + "description": "Can only be specified on condition task dependencies. The outcome of the dependent task that must be met for this task to run." }, "task_key": { - "description": "" + "description": "The name of the task this task depends on." } } } @@ -676,10 +676,10 @@ "description": "An optional description for this task.\nThe maximum length is 4096 bytes." }, "email_notifications": { - "description": "An optional set of email addresses that is notified when runs of this job begin or complete as well as when this job is deleted. The default behavior is to not send any emails.", + "description": "An optional set of email addresses that is notified when runs of this task begin or complete as well as when this task is deleted. The default behavior is to not send any emails.", "properties": { "on_failure": { - "description": "A list of email addresses to be notified when a run unsuccessfully completes. A run is considered to have completed unsuccessfully if it ends with an `INTERNAL_ERROR` `life_cycle_state` or a `SKIPPED`, `FAILED`, or `TIMED_OUT` result_state. If this is not specified on job creation, reset, or update the list is empty, and notifications are not sent.", + "description": "A list of email addresses to be notified when a run unsuccessfully completes. A run is considered to have completed unsuccessfully if it ends with an `INTERNAL_ERROR` `life_cycle_state` or a `FAILED`, or `TIMED_OUT` result_state. If this is not specified on job creation, reset, or update the list is empty, and notifications are not sent.", "items": { "description": "" } @@ -691,7 +691,7 @@ } }, "on_success": { - "description": "A list of email addresses to be notified when a run successfully completes. A run is considered to have completed successfully if it ends with a `TERMINATED` `life_cycle_state` and a `SUCCESSFUL` result_state. If not specified on job creation, reset, or update, the list is empty, and notifications are not sent.", + "description": "A list of email addresses to be notified when a run successfully completes. A run is considered to have completed successfully if it ends with a `TERMINATED` `life_cycle_state` and a `SUCCESS` result_state. If not specified on job creation, reset, or update, the list is empty, and notifications are not sent.", "items": { "description": "" } @@ -767,7 +767,7 @@ "description": "An optional minimal interval in milliseconds between the start of the failed run and the subsequent retry run. The default behavior is that unsuccessful runs are immediately retried." }, "new_cluster": { - "description": "If new_cluster, a description of a cluster that is created for each task.", + "description": "If new_cluster, a description of a cluster that is created for only for this task.", "properties": { "autoscale": { "description": "Parameters needed in order to automatically scale clusters up and down based on load.\nNote: autoscaling works best with DB runtime versions 3.0 or later.", @@ -855,7 +855,7 @@ } }, "s3": { - "description": "destination and either region or endpoint should also be provided. e.g.\n`{ \"s3\": { \"destination\" : \"s3://cluster_log_bucket/prefix\", \"region\" : \"us-west-2\" } }`\nCluster iam role is used to access s3, please make sure the cluster iam role in\n`instance_profile_arn` has permission to write data to the s3 destination.", + "description": "destination and either the region or endpoint need to be provided. e.g.\n`{ \"s3\": { \"destination\" : \"s3://cluster_log_bucket/prefix\", \"region\" : \"us-west-2\" } }`\nCluster iam role is used to access s3, please make sure the cluster iam role in\n`instance_profile_arn` has permission to write data to the s3 destination.", "properties": { "canned_acl": { "description": "(Optional) Set canned access control list for the logs, e.g. `bucket-owner-full-control`.\nIf `canned_cal` is set, please make sure the cluster iam role has `s3:PutObjectAcl` permission on\nthe destination bucket and prefix. The full list of possible canned acl can be found at\nhttp://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl.\nPlease also note that by default only the object owner gets full controls. If you are using cross account\nrole for writing data, you may want to set `bucket-owner-full-control` to make bucket owner able to\nread the logs." @@ -904,15 +904,15 @@ "description": "", "properties": { "password": { - "description": "" + "description": "Password of the user" }, "username": { - "description": "" + "description": "Name of the user" } } }, "url": { - "description": "" + "description": "URL of the docker image." } } }, @@ -941,54 +941,54 @@ "description": "If provided, the cluster will impersonate the google service account when accessing\ngcloud services (like GCS). The google service account\nmust have previously been added to the Databricks environment by an account\nadministrator." }, "local_ssd_count": { - "description": "" + "description": "If provided, each node (workers and driver) in the cluster will have this number of local SSDs attached. Each local SSD is 375GB in size. Refer to [GCP documentation](https://cloud.google.com/compute/docs/disks/local-ssd#choose_number_local_ssds) for the supported number of local SSDs for each instance type." } } }, "init_scripts": { - "description": "", + "description": "The configuration for storing init scripts. Any number of destinations can be specified. The scripts are executed sequentially in the order provided. If `cluster_log_conf` is specified, init script logs are sent to `\u003cdestination\u003e/\u003ccluster-ID\u003e/init_scripts`.", "items": { "description": "", "properties": { "dbfs": { - "description": "", + "description": "destination needs to be provided. e.g.\n`{ \"dbfs\" : { \"destination\" : \"dbfs:/home/cluster_log\" } }`", "properties": { "destination": { - "description": "" + "description": "dbfs destination, e.g. `dbfs:/my/path`" } } }, "s3": { - "description": "", + "description": "destination and either the region or endpoint need to be provided. e.g.\n`{ \"s3\": { \"destination\" : \"s3://cluster_log_bucket/prefix\", \"region\" : \"us-west-2\" } }`\nCluster iam role is used to access s3, please make sure the cluster iam role in\n`instance_profile_arn` has permission to write data to the s3 destination.", "properties": { "canned_acl": { - "description": "" + "description": "(Optional) Set canned access control list for the logs, e.g. `bucket-owner-full-control`.\nIf `canned_cal` is set, please make sure the cluster iam role has `s3:PutObjectAcl` permission on\nthe destination bucket and prefix. The full list of possible canned acl can be found at\nhttp://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl.\nPlease also note that by default only the object owner gets full controls. If you are using cross account\nrole for writing data, you may want to set `bucket-owner-full-control` to make bucket owner able to\nread the logs." }, "destination": { - "description": "" + "description": "S3 destination, e.g. `s3://my-bucket/some-prefix` Note that logs will be delivered using\ncluster iam role, please make sure you set cluster iam role and the role has write access to the\ndestination. Please also note that you cannot use AWS keys to deliver logs." }, "enable_encryption": { - "description": "" + "description": "(Optional) Flag to enable server side encryption, `false` by default." }, "encryption_type": { - "description": "" + "description": "(Optional) The encryption type, it could be `sse-s3` or `sse-kms`. It will be used only when\nencryption is enabled and the default type is `sse-s3`." }, "endpoint": { - "description": "" + "description": "S3 endpoint, e.g. `https://s3-us-west-2.amazonaws.com`. Either region or endpoint needs to be set.\nIf both are set, endpoint will be used." }, "kms_key": { - "description": "" + "description": "(Optional) Kms key which will be used if encryption is enabled and encryption type is set to `sse-kms`." }, "region": { - "description": "" + "description": "S3 region, e.g. `us-west-2`. Either region or endpoint needs to be set. If both are set,\nendpoint will be used." } } }, "workspace": { - "description": "", + "description": "destination needs to be provided. e.g.\n`{ \"workspace\" : { \"destination\" : \"/Users/user1@databricks.com/my-init.sh\" } }`", "properties": { "destination": { - "description": "" + "description": "workspace files destination, e.g. `/Users/user1@databricks.com/my-init.sh`" } } } @@ -1011,7 +1011,7 @@ "description": "" }, "single_user_name": { - "description": "" + "description": "Single user name if data_security_mode is `SINGLE_USER`" }, "spark_conf": { "description": "An object containing a set of optional, user-specified Spark configuration key-value pairs.\nUsers can also pass in a string of extra JVM options to the driver and the executors via\n`spark.driver.extraJavaOptions` and `spark.executor.extraJavaOptions` respectively.\n", @@ -1065,21 +1065,21 @@ "description": "The path of the notebook to be run in the Databricks workspace or remote repository.\nFor notebooks stored in the Databricks workspace, the path must be absolute and begin with a slash.\nFor notebooks stored in a remote repository, the path must be relative. This field is required.\n" }, "source": { - "description": "This describes an enum" + "description": "Optional location type of the notebook. When set to `WORKSPACE`, the notebook will be retrieved\nfrom the local \u003cDatabricks\u003e workspace. When set to `GIT`, the notebook will be retrieved from a Git repository\ndefined in `git_source`. If the value is empty, the task will use `GIT` if `git_source` is defined and `WORKSPACE` otherwise.\n\n* `WORKSPACE`: Notebook is located in \u003cDatabricks\u003e workspace.\n* `GIT`: Notebook is located in cloud Git provider.\n" } } }, "notification_settings": { - "description": "", + "description": "Optional notification settings that are used when sending notifications to each of the `email_notifications` for this task.", "properties": { "alert_on_last_attempt": { - "description": "" + "description": "If true, do not send notifications to recipients specified in `on_start` for the retried runs and do not send notifications to recipients specified in `on_failure` until the last retry of the run." }, "no_alert_for_canceled_runs": { - "description": "" + "description": "If true, do not send notifications to recipients specified in `on_failure` if the run is canceled." }, "no_alert_for_skipped_runs": { - "description": "" + "description": "If true, do not send notifications to recipients specified in `on_failure` if the run is skipped." } } }, @@ -1121,13 +1121,13 @@ "description": "An optional policy to specify whether to retry a task when it times out. The default behavior is to not retry on timeout." }, "run_if": { - "description": "" + "description": "An optional value specifying the condition determining whether the task is run once its dependencies have been completed. When omitted, defaults to `ALL_SUCCESS`.\n\n* `ALL_SUCCESS`: All dependencies have executed and succeeded\n* `AT_LEAST_ONE_SUCCESS`: At least one dependency has succeeded\n* `NONE_FAILED`: None of the dependencies have failed and at least one was executed\n* `ALL_DONE`: All dependencies completed and at least one was executed\n* `AT_LEAST_ONE_FAILED`: At least one dependency failed\n* `ALL_FAILED`: ALl dependencies have failed\n" }, "spark_jar_task": { "description": "If spark_jar_task, indicates that this task must run a JAR.", "properties": { "jar_uri": { - "description": "Deprecated since 04/2016\\\\. Provide a `jar` through the `libraries` field instead. For an example, see :method:jobs/create.\n" + "description": "Deprecated since 04/2016. Provide a `jar` through the `libraries` field instead. For an example, see :method:jobs/create.\n" }, "main_class_name": { "description": "The full name of the class containing the main method to be executed. This class must be contained in a JAR provided as a library.\n\nThe code must use `SparkContext.getOrCreate` to obtain a Spark context; otherwise, runs of the job fail." @@ -1150,10 +1150,10 @@ } }, "python_file": { - "description": "" + "description": "The Python file to be executed. Cloud file URIs (such as dbfs:/, s3:/, adls:/, gcs:/) and workspace paths are supported. For python files stored in the Databricks workspace, the path must be absolute and begin with `/`. For files stored in a remote repository, the path must be relative. This field is required." }, "source": { - "description": "" + "description": "Optional location type of the notebook. When set to `WORKSPACE`, the notebook will be retrieved\nfrom the local \u003cDatabricks\u003e workspace. When set to `GIT`, the notebook will be retrieved from a Git repository\ndefined in `git_source`. If the value is empty, the task will use `GIT` if `git_source` is defined and `WORKSPACE` otherwise.\n\n* `WORKSPACE`: Notebook is located in \u003cDatabricks\u003e workspace.\n* `GIT`: Notebook is located in cloud Git provider.\n" } } }, @@ -1225,10 +1225,10 @@ } }, "file": { - "description": "", + "description": "If file, indicates that this job runs a SQL file in a remote Git repository. Only one SQL statement is supported in a file. Multiple SQL statements separated by semicolons (;) are not permitted.", "properties": { "path": { - "description": "" + "description": "Relative path of the SQL file in the remote Git repository." } } }, @@ -1264,24 +1264,24 @@ "description": "An optional timeout applied to each run of this job. The default behavior is to have no timeout." }, "trigger": { - "description": "", + "description": "Trigger settings for the job. Can be used to trigger a run when new files arrive in an external location. The default behavior is that the job runs only when triggered by clicking “Run Now” in the Jobs UI or sending an API request to `runNow`.", "properties": { "file_arrival": { - "description": "", + "description": "File arrival trigger settings.", "properties": { "min_time_between_triggers_seconds": { - "description": "" + "description": "If set, the trigger starts a run only after the specified amount of time passed since\nthe last time the trigger fired. The minimum allowed value is 60 seconds\n" }, "url": { - "description": "" + "description": "URL to be monitored for file arrivals. The path must point to the root or a subpath of the external location." }, "wait_after_last_change_seconds": { - "description": "" + "description": "If set, the trigger starts a run only after no file activity has occurred for the specified amount of time.\nThis makes it possible to wait for a batch of incoming files to arrive before triggering a run. The\nminimum allowed value is 60 seconds.\n" } } }, "pause_status": { - "description": "" + "description": "Whether this trigger is paused or not." } } }, @@ -1327,7 +1327,7 @@ } }, "models": { - "description": "List of MLflow models", + "description": "", "additionalproperties": { "description": "", "properties": { @@ -1447,7 +1447,7 @@ "description": "", "properties": { "catalog": { - "description": "Catalog in UC to add tables to. If target is specified, tables in this pipeline will be\npublished to a \"target\" schema inside catalog (i.e. \u003ccatalog\u003e.\u003ctarget\u003e.\u003ctable\u003e)." + "description": "A catalog in Unity Catalog to publish data from this pipeline to. If `target` is specified, tables in this pipeline are published to a `target` schema inside `catalog` (for example, `catalog`.`target`.`table`). If `target` is not specified, no data is published to Unity Catalog." }, "channel": { "description": "DLT Release Channel that specifies which version to use." @@ -1507,7 +1507,7 @@ } }, "azure_attributes": { - "description": "Attributes related to clusters running on Amazon Web Services.\nIf not specified at cluster creation, a set of default values will be used.", + "description": "Attributes related to clusters running on Microsoft Azure.\nIf not specified at cluster creation, a set of default values will be used.", "properties": { "availability": { "description": "" @@ -1532,7 +1532,7 @@ } }, "cluster_log_conf": { - "description": "The configuration for delivering spark logs to a long-term storage destination.\nTwo kinds of destinations (dbfs and s3) are supported. Only one destination can be specified\nfor one cluster. If the conf is given, the logs will be delivered to the destination every\n`5 mins`. The destination of driver logs is `$destination/$clusterId/driver`, while\nthe destination of executor logs is `$destination/$clusterId/executor`.", + "description": "The configuration for delivering spark logs to a long-term storage destination.\nOnly dbfs destinations are supported. Only one destination can be specified\nfor one cluster. If the conf is given, the logs will be delivered to the destination every\n`5 mins`. The destination of driver logs is `$destination/$clusterId/driver`, while\nthe destination of executor logs is `$destination/$clusterId/executor`.\n", "properties": { "dbfs": { "description": "destination needs to be provided. e.g.\n`{ \"dbfs\" : { \"destination\" : \"dbfs:/home/cluster_log\" } }`", @@ -1543,7 +1543,7 @@ } }, "s3": { - "description": "destination and either region or endpoint should also be provided. e.g.\n`{ \"s3\": { \"destination\" : \"s3://cluster_log_bucket/prefix\", \"region\" : \"us-west-2\" } }`\nCluster iam role is used to access s3, please make sure the cluster iam role in\n`instance_profile_arn` has permission to write data to the s3 destination.", + "description": "destination and either the region or endpoint need to be provided. e.g.\n`{ \"s3\": { \"destination\" : \"s3://cluster_log_bucket/prefix\", \"region\" : \"us-west-2\" } }`\nCluster iam role is used to access s3, please make sure the cluster iam role in\n`instance_profile_arn` has permission to write data to the s3 destination.", "properties": { "canned_acl": { "description": "(Optional) Set canned access control list for the logs, e.g. `bucket-owner-full-control`.\nIf `canned_cal` is set, please make sure the cluster iam role has `s3:PutObjectAcl` permission on\nthe destination bucket and prefix. The full list of possible canned acl can be found at\nhttp://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl.\nPlease also note that by default only the object owner gets full controls. If you are using cross account\nrole for writing data, you may want to set `bucket-owner-full-control` to make bucket owner able to\nread the logs." @@ -1595,7 +1595,7 @@ "description": "If provided, the cluster will impersonate the google service account when accessing\ngcloud services (like GCS). The google service account\nmust have previously been added to the Databricks environment by an account\nadministrator." }, "local_ssd_count": { - "description": "" + "description": "If provided, each node (workers and driver) in the cluster will have this number of local SSDs attached. Each local SSD is 375GB in size. Refer to [GCP documentation](https://cloud.google.com/compute/docs/disks/local-ssd#choose_number_local_ssds) for the supported number of local SSDs for each instance type." } } }, @@ -1603,7 +1603,7 @@ "description": "The optional ID of the instance pool to which the cluster belongs." }, "label": { - "description": "Cluster label" + "description": "A label for the cluster specification, either `default` to configure the default cluster, or `maintenance` to configure the maintenance cluster. This field is optional. The default value is `default`." }, "node_type_id": { "description": "This field encodes, through a single value, the resources available to each of\nthe Spark nodes in this cluster. For example, the Spark nodes can be provisioned\nand optimized for memory or compute intensive workloads. A list of available node\ntypes can be retrieved by using the :method:clusters/listNodeTypes API call.\n" @@ -1676,18 +1676,18 @@ "description": "", "properties": { "file": { - "description": "", + "description": "The path to a file that defines a pipeline and is stored in the Databricks Repos.\n", "properties": { "path": { - "description": "" + "description": "The absolute path of the file." } } }, "jar": { - "description": "URI of the jar to be installed. Currently only DBFS and S3 URIs are supported.\nFor example: `{ \"jar\": \"dbfs:/mnt/databricks/library.jar\" }` or\n`{ \"jar\": \"s3://my-bucket/library.jar\" }`.\nIf S3 is used, please make sure the cluster has read access on the library. You may need to\nlaunch the cluster with an IAM role to access the S3 URI." + "description": "URI of the jar to be installed. Currently only DBFS is supported.\n" }, "maven": { - "description": "Specification of a maven library to be installed. For example:\n`{ \"coordinates\": \"org.jsoup:jsoup:1.7.2\" }`", + "description": "Specification of a maven library to be installed.\n", "properties": { "coordinates": { "description": "Gradle-style maven coordinates. For example: \"org.jsoup:jsoup:1.7.2\"." @@ -1704,7 +1704,7 @@ } }, "notebook": { - "description": "The path to a notebook that defines a pipeline and is stored in the Databricks workspace.\nFor example: `{ \"notebook\" : { \"path\" : \"/my-pipeline-notebook-path\" } }`.\nCurrently, only Scala notebooks are supported, and pipelines must be defined in a package\ncell.", + "description": "The path to a notebook that defines a pipeline and is stored in the \u003cDatabricks\u003e workspace.\n", "properties": { "path": { "description": "The absolute path of the notebook." @@ -1712,7 +1712,7 @@ } }, "whl": { - "description": "URI of the wheel to be installed.\nFor example: `{ \"whl\": \"dbfs:/my/whl\" }` or `{ \"whl\": \"s3://my-bucket/whl\" }`.\nIf S3 is used, please make sure the cluster has read access on the library. You may need to\nlaunch the cluster with an IAM role to access the S3 URI." + "description": "URI of the wheel to be installed.\n" } } } @@ -1744,13 +1744,13 @@ "description": "Whether Photon is enabled for this pipeline." }, "serverless": { - "description": "" + "description": "Whether serverless compute is enabled for this pipeline." }, "storage": { "description": "DBFS root directory for storing checkpoints and tables." }, "target": { - "description": "Target schema (database) to add tables in this pipeline to." + "description": "Target schema (database) to add tables in this pipeline to. If not specified, no data is published to the Hive metastore or Unity Catalog. To publish to Unity Catalog, also specify `catalog`." }, "trigger": { "description": "Which pipeline trigger to use. Deprecated: Use `continuous` instead.", @@ -1836,10 +1836,10 @@ } }, "resources": { - "description": "Collection of Databricks resources to deploy.", + "description": "Specification of databricks resources to instantiate", "properties": { "experiments": { - "description": "List of MLflow experiments", + "description": "", "additionalproperties": { "description": "", "properties": { @@ -1904,18 +1904,18 @@ "description": "", "properties": { "compute": { - "description": "", + "description": "A list of compute requirements that can be referenced by tasks of this job.", "items": { "description": "", "properties": { "compute_key": { - "description": "" + "description": "A unique name for the compute requirement. This field is required and must be unique within the job.\n`JobTaskSettings` may refer to this field to determine the compute requirements for the task execution." }, "spec": { "description": "", "properties": { "kind": { - "description": "" + "description": "The kind of compute described by this compute specification." } } } @@ -1923,10 +1923,10 @@ } }, "continuous": { - "description": "", + "description": "An optional continuous property for this job. The continuous property will ensure that there is always one run executing. Only one of `schedule` and `continuous` can be used.", "properties": { "pause_status": { - "description": "" + "description": "Whether this trigger is paused or not." } } }, @@ -1937,7 +1937,7 @@ "description": "If true, do not send email to recipients specified in `on_failure` if the run is skipped." }, "on_failure": { - "description": "A list of email addresses to be notified when a run unsuccessfully completes. A run is considered to have completed unsuccessfully if it ends with an `INTERNAL_ERROR` `life_cycle_state` or a `SKIPPED`, `FAILED`, or `TIMED_OUT` result_state. If this is not specified on job creation, reset, or update the list is empty, and notifications are not sent.", + "description": "A list of email addresses to be notified when a run unsuccessfully completes. A run is considered to have completed unsuccessfully if it ends with an `INTERNAL_ERROR` `life_cycle_state` or a `FAILED`, or `TIMED_OUT` result_state. If this is not specified on job creation, reset, or update the list is empty, and notifications are not sent.", "items": { "description": "" } @@ -1949,7 +1949,7 @@ } }, "on_success": { - "description": "A list of email addresses to be notified when a run successfully completes. A run is considered to have completed successfully if it ends with a `TERMINATED` `life_cycle_state` and a `SUCCESSFUL` result_state. If not specified on job creation, reset, or update, the list is empty, and notifications are not sent.", + "description": "A list of email addresses to be notified when a run successfully completes. A run is considered to have completed successfully if it ends with a `TERMINATED` `life_cycle_state` and a `SUCCESS` result_state. If not specified on job creation, reset, or update, the list is empty, and notifications are not sent.", "items": { "description": "" } @@ -1996,7 +1996,7 @@ "description": "A unique name for the job cluster. This field is required and must be unique within the job.\n`JobTaskSettings` may refer to this field to determine which cluster to launch for the task execution." }, "new_cluster": { - "description": "If new_cluster, a description of a cluster that is created for each task.", + "description": "If new_cluster, a description of a cluster that is created for only for this task.", "properties": { "autoscale": { "description": "Parameters needed in order to automatically scale clusters up and down based on load.\nNote: autoscaling works best with DB runtime versions 3.0 or later.", @@ -2084,7 +2084,7 @@ } }, "s3": { - "description": "destination and either region or endpoint should also be provided. e.g.\n`{ \"s3\": { \"destination\" : \"s3://cluster_log_bucket/prefix\", \"region\" : \"us-west-2\" } }`\nCluster iam role is used to access s3, please make sure the cluster iam role in\n`instance_profile_arn` has permission to write data to the s3 destination.", + "description": "destination and either the region or endpoint need to be provided. e.g.\n`{ \"s3\": { \"destination\" : \"s3://cluster_log_bucket/prefix\", \"region\" : \"us-west-2\" } }`\nCluster iam role is used to access s3, please make sure the cluster iam role in\n`instance_profile_arn` has permission to write data to the s3 destination.", "properties": { "canned_acl": { "description": "(Optional) Set canned access control list for the logs, e.g. `bucket-owner-full-control`.\nIf `canned_cal` is set, please make sure the cluster iam role has `s3:PutObjectAcl` permission on\nthe destination bucket and prefix. The full list of possible canned acl can be found at\nhttp://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl.\nPlease also note that by default only the object owner gets full controls. If you are using cross account\nrole for writing data, you may want to set `bucket-owner-full-control` to make bucket owner able to\nread the logs." @@ -2133,15 +2133,15 @@ "description": "", "properties": { "password": { - "description": "" + "description": "Password of the user" }, "username": { - "description": "" + "description": "Name of the user" } } }, "url": { - "description": "" + "description": "URL of the docker image." } } }, @@ -2170,54 +2170,54 @@ "description": "If provided, the cluster will impersonate the google service account when accessing\ngcloud services (like GCS). The google service account\nmust have previously been added to the Databricks environment by an account\nadministrator." }, "local_ssd_count": { - "description": "" + "description": "If provided, each node (workers and driver) in the cluster will have this number of local SSDs attached. Each local SSD is 375GB in size. Refer to [GCP documentation](https://cloud.google.com/compute/docs/disks/local-ssd#choose_number_local_ssds) for the supported number of local SSDs for each instance type." } } }, "init_scripts": { - "description": "", + "description": "The configuration for storing init scripts. Any number of destinations can be specified. The scripts are executed sequentially in the order provided. If `cluster_log_conf` is specified, init script logs are sent to `\u003cdestination\u003e/\u003ccluster-ID\u003e/init_scripts`.", "items": { "description": "", "properties": { "dbfs": { - "description": "", + "description": "destination needs to be provided. e.g.\n`{ \"dbfs\" : { \"destination\" : \"dbfs:/home/cluster_log\" } }`", "properties": { "destination": { - "description": "" + "description": "dbfs destination, e.g. `dbfs:/my/path`" } } }, "s3": { - "description": "", + "description": "destination and either the region or endpoint need to be provided. e.g.\n`{ \"s3\": { \"destination\" : \"s3://cluster_log_bucket/prefix\", \"region\" : \"us-west-2\" } }`\nCluster iam role is used to access s3, please make sure the cluster iam role in\n`instance_profile_arn` has permission to write data to the s3 destination.", "properties": { "canned_acl": { - "description": "" + "description": "(Optional) Set canned access control list for the logs, e.g. `bucket-owner-full-control`.\nIf `canned_cal` is set, please make sure the cluster iam role has `s3:PutObjectAcl` permission on\nthe destination bucket and prefix. The full list of possible canned acl can be found at\nhttp://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl.\nPlease also note that by default only the object owner gets full controls. If you are using cross account\nrole for writing data, you may want to set `bucket-owner-full-control` to make bucket owner able to\nread the logs." }, "destination": { - "description": "" + "description": "S3 destination, e.g. `s3://my-bucket/some-prefix` Note that logs will be delivered using\ncluster iam role, please make sure you set cluster iam role and the role has write access to the\ndestination. Please also note that you cannot use AWS keys to deliver logs." }, "enable_encryption": { - "description": "" + "description": "(Optional) Flag to enable server side encryption, `false` by default." }, "encryption_type": { - "description": "" + "description": "(Optional) The encryption type, it could be `sse-s3` or `sse-kms`. It will be used only when\nencryption is enabled and the default type is `sse-s3`." }, "endpoint": { - "description": "" + "description": "S3 endpoint, e.g. `https://s3-us-west-2.amazonaws.com`. Either region or endpoint needs to be set.\nIf both are set, endpoint will be used." }, "kms_key": { - "description": "" + "description": "(Optional) Kms key which will be used if encryption is enabled and encryption type is set to `sse-kms`." }, "region": { - "description": "" + "description": "S3 region, e.g. `us-west-2`. Either region or endpoint needs to be set. If both are set,\nendpoint will be used." } } }, "workspace": { - "description": "", + "description": "destination needs to be provided. e.g.\n`{ \"workspace\" : { \"destination\" : \"/Users/user1@databricks.com/my-init.sh\" } }`", "properties": { "destination": { - "description": "" + "description": "workspace files destination, e.g. `/Users/user1@databricks.com/my-init.sh`" } } } @@ -2240,7 +2240,7 @@ "description": "" }, "single_user_name": { - "description": "" + "description": "Single user name if data_security_mode is `SINGLE_USER`" }, "spark_conf": { "description": "An object containing a set of optional, user-specified Spark configuration key-value pairs.\nUsers can also pass in a string of extra JVM options to the driver and the executors via\n`spark.driver.extraJavaOptions` and `spark.executor.extraJavaOptions` respectively.\n", @@ -2291,26 +2291,26 @@ "description": "An optional name for the job." }, "notification_settings": { - "description": "", + "description": "Optional notification settings that are used when sending notifications to each of the `email_notifications` and `webhook_notifications` for this job.", "properties": { "no_alert_for_canceled_runs": { - "description": "" + "description": "If true, do not send notifications to recipients specified in `on_failure` if the run is canceled." }, "no_alert_for_skipped_runs": { - "description": "" + "description": "If true, do not send notifications to recipients specified in `on_failure` if the run is skipped." } } }, "parameters": { - "description": "", + "description": "Job-level parameter definitions", "items": { "description": "", "properties": { "default": { - "description": "" + "description": "Default value of the parameter." }, "name": { - "description": "" + "description": "The name of the defined parameter. May only contain alphanumeric characters, `_`, `-`, and `.`" } } } @@ -2339,10 +2339,10 @@ "description": "", "properties": { "service_principal_name": { - "description": "" + "description": "Application ID of an active service principal. Setting this field requires the `servicePrincipal/user` role." }, "user_name": { - "description": "" + "description": "The email of an active workspace user. Non-admin users can only set this field to their own email." } } }, @@ -2350,7 +2350,7 @@ "description": "An optional periodic schedule for this job. The default behavior is that the job only runs when triggered by clicking “Run Now” in the Jobs UI or sending an API request to `runNow`.", "properties": { "pause_status": { - "description": "Indicate whether this schedule is paused or not." + "description": "Whether this trigger is paused or not." }, "quartz_cron_expression": { "description": "A Cron expression using Quartz syntax that describes the schedule for a job.\nSee [Cron Trigger](http://www.quartz-scheduler.org/documentation/quartz-2.3.0/tutorials/crontrigger.html)\nfor details. This field is required.\"\n" @@ -2372,19 +2372,19 @@ "description": "", "properties": { "compute_key": { - "description": "" + "description": "The key of the compute requirement, specified in `job.settings.compute`, to use for execution of this task." }, "condition_task": { - "description": "", + "description": "If condition_task, specifies a condition with an outcome that can be used to control the execution of other tasks. Does not require a cluster to execute and does not support retries or notifications.", "properties": { "left": { - "description": "" + "description": "The left operand of the condition task. Can be either a string value or a job state or parameter reference." }, "op": { - "description": "" + "description": "* `EQUAL_TO`, `NOT_EQUAL` operators perform string comparison of their operands. This means that `“12.0” == “12”` will evaluate to `false`.\n* `GREATER_THAN`, `GREATER_THAN_OR_EQUAL`, `LESS_THAN`, `LESS_THAN_OR_EQUAL` operators perform numeric comparison of their operands. `“12.0” \u003e= “12”` will evaluate to `true`, `“10.0” \u003e= “12”` will evaluate to `false`.\n\nThe boolean comparison to task values can be implemented with operators `EQUAL_TO`, `NOT_EQUAL`. If a task value was set to a boolean value, it will be serialized to `“true”` or `“false”` for the comparison.\n" }, "right": { - "description": "" + "description": "The right operand of the condition task. Can be either a string value or a job state or parameter reference." } } }, @@ -2415,15 +2415,15 @@ } }, "depends_on": { - "description": "", + "description": "An optional array of objects specifying the dependency graph of the task. All tasks specified in this field must complete successfully before executing this task.\nThe key is `task_key`, and the value is the name assigned to the dependent task.\n", "items": { "description": "", "properties": { "outcome": { - "description": "" + "description": "Can only be specified on condition task dependencies. The outcome of the dependent task that must be met for this task to run." }, "task_key": { - "description": "" + "description": "The name of the task this task depends on." } } } @@ -2432,10 +2432,10 @@ "description": "An optional description for this task.\nThe maximum length is 4096 bytes." }, "email_notifications": { - "description": "An optional set of email addresses that is notified when runs of this job begin or complete as well as when this job is deleted. The default behavior is to not send any emails.", + "description": "An optional set of email addresses that is notified when runs of this task begin or complete as well as when this task is deleted. The default behavior is to not send any emails.", "properties": { "on_failure": { - "description": "A list of email addresses to be notified when a run unsuccessfully completes. A run is considered to have completed unsuccessfully if it ends with an `INTERNAL_ERROR` `life_cycle_state` or a `SKIPPED`, `FAILED`, or `TIMED_OUT` result_state. If this is not specified on job creation, reset, or update the list is empty, and notifications are not sent.", + "description": "A list of email addresses to be notified when a run unsuccessfully completes. A run is considered to have completed unsuccessfully if it ends with an `INTERNAL_ERROR` `life_cycle_state` or a `FAILED`, or `TIMED_OUT` result_state. If this is not specified on job creation, reset, or update the list is empty, and notifications are not sent.", "items": { "description": "" } @@ -2447,7 +2447,7 @@ } }, "on_success": { - "description": "A list of email addresses to be notified when a run successfully completes. A run is considered to have completed successfully if it ends with a `TERMINATED` `life_cycle_state` and a `SUCCESSFUL` result_state. If not specified on job creation, reset, or update, the list is empty, and notifications are not sent.", + "description": "A list of email addresses to be notified when a run successfully completes. A run is considered to have completed successfully if it ends with a `TERMINATED` `life_cycle_state` and a `SUCCESS` result_state. If not specified on job creation, reset, or update, the list is empty, and notifications are not sent.", "items": { "description": "" } @@ -2523,7 +2523,7 @@ "description": "An optional minimal interval in milliseconds between the start of the failed run and the subsequent retry run. The default behavior is that unsuccessful runs are immediately retried." }, "new_cluster": { - "description": "If new_cluster, a description of a cluster that is created for each task.", + "description": "If new_cluster, a description of a cluster that is created for only for this task.", "properties": { "autoscale": { "description": "Parameters needed in order to automatically scale clusters up and down based on load.\nNote: autoscaling works best with DB runtime versions 3.0 or later.", @@ -2611,7 +2611,7 @@ } }, "s3": { - "description": "destination and either region or endpoint should also be provided. e.g.\n`{ \"s3\": { \"destination\" : \"s3://cluster_log_bucket/prefix\", \"region\" : \"us-west-2\" } }`\nCluster iam role is used to access s3, please make sure the cluster iam role in\n`instance_profile_arn` has permission to write data to the s3 destination.", + "description": "destination and either the region or endpoint need to be provided. e.g.\n`{ \"s3\": { \"destination\" : \"s3://cluster_log_bucket/prefix\", \"region\" : \"us-west-2\" } }`\nCluster iam role is used to access s3, please make sure the cluster iam role in\n`instance_profile_arn` has permission to write data to the s3 destination.", "properties": { "canned_acl": { "description": "(Optional) Set canned access control list for the logs, e.g. `bucket-owner-full-control`.\nIf `canned_cal` is set, please make sure the cluster iam role has `s3:PutObjectAcl` permission on\nthe destination bucket and prefix. The full list of possible canned acl can be found at\nhttp://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl.\nPlease also note that by default only the object owner gets full controls. If you are using cross account\nrole for writing data, you may want to set `bucket-owner-full-control` to make bucket owner able to\nread the logs." @@ -2660,15 +2660,15 @@ "description": "", "properties": { "password": { - "description": "" + "description": "Password of the user" }, "username": { - "description": "" + "description": "Name of the user" } } }, "url": { - "description": "" + "description": "URL of the docker image." } } }, @@ -2697,54 +2697,54 @@ "description": "If provided, the cluster will impersonate the google service account when accessing\ngcloud services (like GCS). The google service account\nmust have previously been added to the Databricks environment by an account\nadministrator." }, "local_ssd_count": { - "description": "" + "description": "If provided, each node (workers and driver) in the cluster will have this number of local SSDs attached. Each local SSD is 375GB in size. Refer to [GCP documentation](https://cloud.google.com/compute/docs/disks/local-ssd#choose_number_local_ssds) for the supported number of local SSDs for each instance type." } } }, "init_scripts": { - "description": "", + "description": "The configuration for storing init scripts. Any number of destinations can be specified. The scripts are executed sequentially in the order provided. If `cluster_log_conf` is specified, init script logs are sent to `\u003cdestination\u003e/\u003ccluster-ID\u003e/init_scripts`.", "items": { "description": "", "properties": { "dbfs": { - "description": "", + "description": "destination needs to be provided. e.g.\n`{ \"dbfs\" : { \"destination\" : \"dbfs:/home/cluster_log\" } }`", "properties": { "destination": { - "description": "" + "description": "dbfs destination, e.g. `dbfs:/my/path`" } } }, "s3": { - "description": "", + "description": "destination and either the region or endpoint need to be provided. e.g.\n`{ \"s3\": { \"destination\" : \"s3://cluster_log_bucket/prefix\", \"region\" : \"us-west-2\" } }`\nCluster iam role is used to access s3, please make sure the cluster iam role in\n`instance_profile_arn` has permission to write data to the s3 destination.", "properties": { "canned_acl": { - "description": "" + "description": "(Optional) Set canned access control list for the logs, e.g. `bucket-owner-full-control`.\nIf `canned_cal` is set, please make sure the cluster iam role has `s3:PutObjectAcl` permission on\nthe destination bucket and prefix. The full list of possible canned acl can be found at\nhttp://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl.\nPlease also note that by default only the object owner gets full controls. If you are using cross account\nrole for writing data, you may want to set `bucket-owner-full-control` to make bucket owner able to\nread the logs." }, "destination": { - "description": "" + "description": "S3 destination, e.g. `s3://my-bucket/some-prefix` Note that logs will be delivered using\ncluster iam role, please make sure you set cluster iam role and the role has write access to the\ndestination. Please also note that you cannot use AWS keys to deliver logs." }, "enable_encryption": { - "description": "" + "description": "(Optional) Flag to enable server side encryption, `false` by default." }, "encryption_type": { - "description": "" + "description": "(Optional) The encryption type, it could be `sse-s3` or `sse-kms`. It will be used only when\nencryption is enabled and the default type is `sse-s3`." }, "endpoint": { - "description": "" + "description": "S3 endpoint, e.g. `https://s3-us-west-2.amazonaws.com`. Either region or endpoint needs to be set.\nIf both are set, endpoint will be used." }, "kms_key": { - "description": "" + "description": "(Optional) Kms key which will be used if encryption is enabled and encryption type is set to `sse-kms`." }, "region": { - "description": "" + "description": "S3 region, e.g. `us-west-2`. Either region or endpoint needs to be set. If both are set,\nendpoint will be used." } } }, "workspace": { - "description": "", + "description": "destination needs to be provided. e.g.\n`{ \"workspace\" : { \"destination\" : \"/Users/user1@databricks.com/my-init.sh\" } }`", "properties": { "destination": { - "description": "" + "description": "workspace files destination, e.g. `/Users/user1@databricks.com/my-init.sh`" } } } @@ -2767,7 +2767,7 @@ "description": "" }, "single_user_name": { - "description": "" + "description": "Single user name if data_security_mode is `SINGLE_USER`" }, "spark_conf": { "description": "An object containing a set of optional, user-specified Spark configuration key-value pairs.\nUsers can also pass in a string of extra JVM options to the driver and the executors via\n`spark.driver.extraJavaOptions` and `spark.executor.extraJavaOptions` respectively.\n", @@ -2821,21 +2821,21 @@ "description": "The path of the notebook to be run in the Databricks workspace or remote repository.\nFor notebooks stored in the Databricks workspace, the path must be absolute and begin with a slash.\nFor notebooks stored in a remote repository, the path must be relative. This field is required.\n" }, "source": { - "description": "This describes an enum" + "description": "Optional location type of the notebook. When set to `WORKSPACE`, the notebook will be retrieved\nfrom the local \u003cDatabricks\u003e workspace. When set to `GIT`, the notebook will be retrieved from a Git repository\ndefined in `git_source`. If the value is empty, the task will use `GIT` if `git_source` is defined and `WORKSPACE` otherwise.\n\n* `WORKSPACE`: Notebook is located in \u003cDatabricks\u003e workspace.\n* `GIT`: Notebook is located in cloud Git provider.\n" } } }, "notification_settings": { - "description": "", + "description": "Optional notification settings that are used when sending notifications to each of the `email_notifications` for this task.", "properties": { "alert_on_last_attempt": { - "description": "" + "description": "If true, do not send notifications to recipients specified in `on_start` for the retried runs and do not send notifications to recipients specified in `on_failure` until the last retry of the run." }, "no_alert_for_canceled_runs": { - "description": "" + "description": "If true, do not send notifications to recipients specified in `on_failure` if the run is canceled." }, "no_alert_for_skipped_runs": { - "description": "" + "description": "If true, do not send notifications to recipients specified in `on_failure` if the run is skipped." } } }, @@ -2877,13 +2877,13 @@ "description": "An optional policy to specify whether to retry a task when it times out. The default behavior is to not retry on timeout." }, "run_if": { - "description": "" + "description": "An optional value specifying the condition determining whether the task is run once its dependencies have been completed. When omitted, defaults to `ALL_SUCCESS`.\n\n* `ALL_SUCCESS`: All dependencies have executed and succeeded\n* `AT_LEAST_ONE_SUCCESS`: At least one dependency has succeeded\n* `NONE_FAILED`: None of the dependencies have failed and at least one was executed\n* `ALL_DONE`: All dependencies completed and at least one was executed\n* `AT_LEAST_ONE_FAILED`: At least one dependency failed\n* `ALL_FAILED`: ALl dependencies have failed\n" }, "spark_jar_task": { "description": "If spark_jar_task, indicates that this task must run a JAR.", "properties": { "jar_uri": { - "description": "Deprecated since 04/2016\\\\. Provide a `jar` through the `libraries` field instead. For an example, see :method:jobs/create.\n" + "description": "Deprecated since 04/2016. Provide a `jar` through the `libraries` field instead. For an example, see :method:jobs/create.\n" }, "main_class_name": { "description": "The full name of the class containing the main method to be executed. This class must be contained in a JAR provided as a library.\n\nThe code must use `SparkContext.getOrCreate` to obtain a Spark context; otherwise, runs of the job fail." @@ -2906,10 +2906,10 @@ } }, "python_file": { - "description": "" + "description": "The Python file to be executed. Cloud file URIs (such as dbfs:/, s3:/, adls:/, gcs:/) and workspace paths are supported. For python files stored in the Databricks workspace, the path must be absolute and begin with `/`. For files stored in a remote repository, the path must be relative. This field is required." }, "source": { - "description": "" + "description": "Optional location type of the notebook. When set to `WORKSPACE`, the notebook will be retrieved\nfrom the local \u003cDatabricks\u003e workspace. When set to `GIT`, the notebook will be retrieved from a Git repository\ndefined in `git_source`. If the value is empty, the task will use `GIT` if `git_source` is defined and `WORKSPACE` otherwise.\n\n* `WORKSPACE`: Notebook is located in \u003cDatabricks\u003e workspace.\n* `GIT`: Notebook is located in cloud Git provider.\n" } } }, @@ -2981,10 +2981,10 @@ } }, "file": { - "description": "", + "description": "If file, indicates that this job runs a SQL file in a remote Git repository. Only one SQL statement is supported in a file. Multiple SQL statements separated by semicolons (;) are not permitted.", "properties": { "path": { - "description": "" + "description": "Relative path of the SQL file in the remote Git repository." } } }, @@ -3020,24 +3020,24 @@ "description": "An optional timeout applied to each run of this job. The default behavior is to have no timeout." }, "trigger": { - "description": "", + "description": "Trigger settings for the job. Can be used to trigger a run when new files arrive in an external location. The default behavior is that the job runs only when triggered by clicking “Run Now” in the Jobs UI or sending an API request to `runNow`.", "properties": { "file_arrival": { - "description": "", + "description": "File arrival trigger settings.", "properties": { "min_time_between_triggers_seconds": { - "description": "" + "description": "If set, the trigger starts a run only after the specified amount of time passed since\nthe last time the trigger fired. The minimum allowed value is 60 seconds\n" }, "url": { - "description": "" + "description": "URL to be monitored for file arrivals. The path must point to the root or a subpath of the external location." }, "wait_after_last_change_seconds": { - "description": "" + "description": "If set, the trigger starts a run only after no file activity has occurred for the specified amount of time.\nThis makes it possible to wait for a batch of incoming files to arrive before triggering a run. The\nminimum allowed value is 60 seconds.\n" } } }, "pause_status": { - "description": "" + "description": "Whether this trigger is paused or not." } } }, @@ -3083,7 +3083,7 @@ } }, "models": { - "description": "List of MLflow models", + "description": "", "additionalproperties": { "description": "", "properties": { @@ -3203,7 +3203,7 @@ "description": "", "properties": { "catalog": { - "description": "Catalog in UC to add tables to. If target is specified, tables in this pipeline will be\npublished to a \"target\" schema inside catalog (i.e. \u003ccatalog\u003e.\u003ctarget\u003e.\u003ctable\u003e)." + "description": "A catalog in Unity Catalog to publish data from this pipeline to. If `target` is specified, tables in this pipeline are published to a `target` schema inside `catalog` (for example, `catalog`.`target`.`table`). If `target` is not specified, no data is published to Unity Catalog." }, "channel": { "description": "DLT Release Channel that specifies which version to use." @@ -3263,7 +3263,7 @@ } }, "azure_attributes": { - "description": "Attributes related to clusters running on Amazon Web Services.\nIf not specified at cluster creation, a set of default values will be used.", + "description": "Attributes related to clusters running on Microsoft Azure.\nIf not specified at cluster creation, a set of default values will be used.", "properties": { "availability": { "description": "" @@ -3288,7 +3288,7 @@ } }, "cluster_log_conf": { - "description": "The configuration for delivering spark logs to a long-term storage destination.\nTwo kinds of destinations (dbfs and s3) are supported. Only one destination can be specified\nfor one cluster. If the conf is given, the logs will be delivered to the destination every\n`5 mins`. The destination of driver logs is `$destination/$clusterId/driver`, while\nthe destination of executor logs is `$destination/$clusterId/executor`.", + "description": "The configuration for delivering spark logs to a long-term storage destination.\nOnly dbfs destinations are supported. Only one destination can be specified\nfor one cluster. If the conf is given, the logs will be delivered to the destination every\n`5 mins`. The destination of driver logs is `$destination/$clusterId/driver`, while\nthe destination of executor logs is `$destination/$clusterId/executor`.\n", "properties": { "dbfs": { "description": "destination needs to be provided. e.g.\n`{ \"dbfs\" : { \"destination\" : \"dbfs:/home/cluster_log\" } }`", @@ -3299,7 +3299,7 @@ } }, "s3": { - "description": "destination and either region or endpoint should also be provided. e.g.\n`{ \"s3\": { \"destination\" : \"s3://cluster_log_bucket/prefix\", \"region\" : \"us-west-2\" } }`\nCluster iam role is used to access s3, please make sure the cluster iam role in\n`instance_profile_arn` has permission to write data to the s3 destination.", + "description": "destination and either the region or endpoint need to be provided. e.g.\n`{ \"s3\": { \"destination\" : \"s3://cluster_log_bucket/prefix\", \"region\" : \"us-west-2\" } }`\nCluster iam role is used to access s3, please make sure the cluster iam role in\n`instance_profile_arn` has permission to write data to the s3 destination.", "properties": { "canned_acl": { "description": "(Optional) Set canned access control list for the logs, e.g. `bucket-owner-full-control`.\nIf `canned_cal` is set, please make sure the cluster iam role has `s3:PutObjectAcl` permission on\nthe destination bucket and prefix. The full list of possible canned acl can be found at\nhttp://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl.\nPlease also note that by default only the object owner gets full controls. If you are using cross account\nrole for writing data, you may want to set `bucket-owner-full-control` to make bucket owner able to\nread the logs." @@ -3351,7 +3351,7 @@ "description": "If provided, the cluster will impersonate the google service account when accessing\ngcloud services (like GCS). The google service account\nmust have previously been added to the Databricks environment by an account\nadministrator." }, "local_ssd_count": { - "description": "" + "description": "If provided, each node (workers and driver) in the cluster will have this number of local SSDs attached. Each local SSD is 375GB in size. Refer to [GCP documentation](https://cloud.google.com/compute/docs/disks/local-ssd#choose_number_local_ssds) for the supported number of local SSDs for each instance type." } } }, @@ -3359,7 +3359,7 @@ "description": "The optional ID of the instance pool to which the cluster belongs." }, "label": { - "description": "Cluster label" + "description": "A label for the cluster specification, either `default` to configure the default cluster, or `maintenance` to configure the maintenance cluster. This field is optional. The default value is `default`." }, "node_type_id": { "description": "This field encodes, through a single value, the resources available to each of\nthe Spark nodes in this cluster. For example, the Spark nodes can be provisioned\nand optimized for memory or compute intensive workloads. A list of available node\ntypes can be retrieved by using the :method:clusters/listNodeTypes API call.\n" @@ -3432,18 +3432,18 @@ "description": "", "properties": { "file": { - "description": "", + "description": "The path to a file that defines a pipeline and is stored in the Databricks Repos.\n", "properties": { "path": { - "description": "" + "description": "The absolute path of the file." } } }, "jar": { - "description": "URI of the jar to be installed. Currently only DBFS and S3 URIs are supported.\nFor example: `{ \"jar\": \"dbfs:/mnt/databricks/library.jar\" }` or\n`{ \"jar\": \"s3://my-bucket/library.jar\" }`.\nIf S3 is used, please make sure the cluster has read access on the library. You may need to\nlaunch the cluster with an IAM role to access the S3 URI." + "description": "URI of the jar to be installed. Currently only DBFS is supported.\n" }, "maven": { - "description": "Specification of a maven library to be installed. For example:\n`{ \"coordinates\": \"org.jsoup:jsoup:1.7.2\" }`", + "description": "Specification of a maven library to be installed.\n", "properties": { "coordinates": { "description": "Gradle-style maven coordinates. For example: \"org.jsoup:jsoup:1.7.2\"." @@ -3460,7 +3460,7 @@ } }, "notebook": { - "description": "The path to a notebook that defines a pipeline and is stored in the Databricks workspace.\nFor example: `{ \"notebook\" : { \"path\" : \"/my-pipeline-notebook-path\" } }`.\nCurrently, only Scala notebooks are supported, and pipelines must be defined in a package\ncell.", + "description": "The path to a notebook that defines a pipeline and is stored in the \u003cDatabricks\u003e workspace.\n", "properties": { "path": { "description": "The absolute path of the notebook." @@ -3468,7 +3468,7 @@ } }, "whl": { - "description": "URI of the wheel to be installed.\nFor example: `{ \"whl\": \"dbfs:/my/whl\" }` or `{ \"whl\": \"s3://my-bucket/whl\" }`.\nIf S3 is used, please make sure the cluster has read access on the library. You may need to\nlaunch the cluster with an IAM role to access the S3 URI." + "description": "URI of the wheel to be installed.\n" } } } @@ -3500,13 +3500,13 @@ "description": "Whether Photon is enabled for this pipeline." }, "serverless": { - "description": "" + "description": "Whether serverless compute is enabled for this pipeline." }, "storage": { "description": "DBFS root directory for storing checkpoints and tables." }, "target": { - "description": "Target schema (database) to add tables in this pipeline to." + "description": "Target schema (database) to add tables in this pipeline to. If not specified, no data is published to the Hive metastore or Unity Catalog. To publish to Unity Catalog, also specify `catalog`." }, "trigger": { "description": "Which pipeline trigger to use. Deprecated: Use `continuous` instead.", From 7a613f07a5ce3fb689a2970db697d7d8ecac366c Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Fri, 7 Jul 2023 01:28:05 +0200 Subject: [PATCH 3/8] wip --- bundle/schema/openapi.go | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/bundle/schema/openapi.go b/bundle/schema/openapi.go index 6c2944aab4d..8724a0838d8 100644 --- a/bundle/schema/openapi.go +++ b/bundle/schema/openapi.go @@ -162,7 +162,7 @@ func (reader *OpenapiReader) jobsDocs() (*Docs, error) { // TODO: add description for id if needed. // Tracked in https://github.com/databricks/cli/issues/242 jobsDocs := &Docs{ - Description: "List of job definations", + Description: "List of Databricks jobs", AdditionalProperties: jobDocs, } return jobsDocs, nil @@ -177,12 +177,25 @@ func (reader *OpenapiReader) pipelinesDocs() (*Docs, error) { // TODO: Two fields in resources.Pipeline have the json tag id. Clarify the // semantics and then add a description if needed. (https://github.com/databricks/cli/issues/242) pipelinesDocs := &Docs{ - Description: "List of pipeline definations", + Description: "List of DLT pipelines", AdditionalProperties: pipelineDocs, } return pipelinesDocs, nil } +func (reader *OpenapiReader) experimentsDocs() (*Docs, error) { + experimentSpecSchema, err := reader.readResolvedSchema(SchemaPathPrefix + "ml.Experiment") + if err != nil { + return nil, err + } + experimentDocs := schemaToDocs(experimentSpecSchema) + experimentsDocs := &Docs{ + Description: "List of MLflow experiments", + AdditionalProperties: experimentDocs, + } + return experimentsDocs, nil +} + func (reader *OpenapiReader) ResourcesDocs() (*Docs, error) { jobsDocs, err := reader.jobsDocs() if err != nil { @@ -192,12 +205,17 @@ func (reader *OpenapiReader) ResourcesDocs() (*Docs, error) { if err != nil { return nil, err } + experimentsDocs, err := reader.experimentsDocs() + if err != nil { + return nil, err + } return &Docs{ - Description: "Specification of databricks resources to instantiate", + Description: "Collection of Databricks resources to deploy.", Properties: map[string]*Docs{ - "jobs": jobsDocs, - "pipelines": pipelinesDocs, + "jobs": jobsDocs, + "pipelines": pipelinesDocs, + "experiments": experimentsDocs, }, }, nil } From 0ff9cf07b1d22a77996ca0c10a41c0863ea6fe36 Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Fri, 7 Jul 2023 01:28:59 +0200 Subject: [PATCH 4/8] - --- bundle/schema/docs/bundle_descriptions.json | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/bundle/schema/docs/bundle_descriptions.json b/bundle/schema/docs/bundle_descriptions.json index 6f546aba168..87394105a26 100644 --- a/bundle/schema/docs/bundle_descriptions.json +++ b/bundle/schema/docs/bundle_descriptions.json @@ -170,7 +170,7 @@ "description": "An optional continuous property for this job. The continuous property will ensure that there is always one run executing. Only one of `schedule` and `continuous` can be used.", "properties": { "pause_status": { - "description": "Whether this trigger is paused or not." + "description": "Indicate whether this schedule is paused or not." } } }, @@ -594,7 +594,7 @@ "description": "An optional periodic schedule for this job. The default behavior is that the job only runs when triggered by clicking “Run Now” in the Jobs UI or sending an API request to `runNow`.", "properties": { "pause_status": { - "description": "Whether this trigger is paused or not." + "description": "Indicate whether this schedule is paused or not." }, "quartz_cron_expression": { "description": "A Cron expression using Quartz syntax that describes the schedule for a job.\nSee [Cron Trigger](http://www.quartz-scheduler.org/documentation/quartz-2.3.0/tutorials/crontrigger.html)\nfor details. This field is required.\"\n" @@ -1281,7 +1281,7 @@ } }, "pause_status": { - "description": "Whether this trigger is paused or not." + "description": "Indicate whether this schedule is paused or not." } } }, @@ -1926,7 +1926,7 @@ "description": "An optional continuous property for this job. The continuous property will ensure that there is always one run executing. Only one of `schedule` and `continuous` can be used.", "properties": { "pause_status": { - "description": "Whether this trigger is paused or not." + "description": "Indicate whether this schedule is paused or not." } } }, @@ -2350,7 +2350,7 @@ "description": "An optional periodic schedule for this job. The default behavior is that the job only runs when triggered by clicking “Run Now” in the Jobs UI or sending an API request to `runNow`.", "properties": { "pause_status": { - "description": "Whether this trigger is paused or not." + "description": "Indicate whether this schedule is paused or not." }, "quartz_cron_expression": { "description": "A Cron expression using Quartz syntax that describes the schedule for a job.\nSee [Cron Trigger](http://www.quartz-scheduler.org/documentation/quartz-2.3.0/tutorials/crontrigger.html)\nfor details. This field is required.\"\n" @@ -3037,7 +3037,7 @@ } }, "pause_status": { - "description": "Whether this trigger is paused or not." + "description": "Indicate whether this schedule is paused or not." } } }, From ad3b3d7dcc5158516c172ed646224ac8129f733c Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Fri, 7 Jul 2023 01:30:03 +0200 Subject: [PATCH 5/8] experiment-docs --- bundle/schema/docs/bundle_descriptions.json | 72 ++++++++++----------- 1 file changed, 36 insertions(+), 36 deletions(-) diff --git a/bundle/schema/docs/bundle_descriptions.json b/bundle/schema/docs/bundle_descriptions.json index 6f546aba168..50d0fce5f8c 100644 --- a/bundle/schema/docs/bundle_descriptions.json +++ b/bundle/schema/docs/bundle_descriptions.json @@ -80,30 +80,30 @@ "description": "" }, "resources": { - "description": "Specification of databricks resources to instantiate", + "description": "Collection of Databricks resources to deploy.", "properties": { "experiments": { - "description": "", + "description": "List of MLflow experiments", "additionalproperties": { "description": "", "properties": { "artifact_location": { - "description": "" + "description": "Location where artifacts for the experiment are stored." }, "creation_time": { - "description": "" + "description": "Creation time" }, "experiment_id": { - "description": "" + "description": "Unique identifier for the experiment." }, "last_update_time": { - "description": "" + "description": "Last update time" }, "lifecycle_stage": { - "description": "" + "description": "Current life cycle stage of the experiment: \"active\" or \"deleted\".\nDeleted experiments are not returned by APIs." }, "name": { - "description": "" + "description": "Human readable name that identifies the experiment." }, "permissions": { "description": "", @@ -126,15 +126,15 @@ } }, "tags": { - "description": "", + "description": "Tags: Additional metadata key-value pairs.", "items": { "description": "", "properties": { "key": { - "description": "" + "description": "The tag key." }, "value": { - "description": "" + "description": "The tag value." } } } @@ -143,7 +143,7 @@ } }, "jobs": { - "description": "List of job definations", + "description": "List of Databricks jobs", "additionalproperties": { "description": "", "properties": { @@ -170,7 +170,7 @@ "description": "An optional continuous property for this job. The continuous property will ensure that there is always one run executing. Only one of `schedule` and `continuous` can be used.", "properties": { "pause_status": { - "description": "Whether this trigger is paused or not." + "description": "Indicate whether this schedule is paused or not." } } }, @@ -240,7 +240,7 @@ "description": "A unique name for the job cluster. This field is required and must be unique within the job.\n`JobTaskSettings` may refer to this field to determine which cluster to launch for the task execution." }, "new_cluster": { - "description": "If new_cluster, a description of a cluster that is created for only for this task.", + "description": "If new_cluster, a description of a cluster that is created for each task.", "properties": { "autoscale": { "description": "Parameters needed in order to automatically scale clusters up and down based on load.\nNote: autoscaling works best with DB runtime versions 3.0 or later.", @@ -594,7 +594,7 @@ "description": "An optional periodic schedule for this job. The default behavior is that the job only runs when triggered by clicking “Run Now” in the Jobs UI or sending an API request to `runNow`.", "properties": { "pause_status": { - "description": "Whether this trigger is paused or not." + "description": "Indicate whether this schedule is paused or not." }, "quartz_cron_expression": { "description": "A Cron expression using Quartz syntax that describes the schedule for a job.\nSee [Cron Trigger](http://www.quartz-scheduler.org/documentation/quartz-2.3.0/tutorials/crontrigger.html)\nfor details. This field is required.\"\n" @@ -767,7 +767,7 @@ "description": "An optional minimal interval in milliseconds between the start of the failed run and the subsequent retry run. The default behavior is that unsuccessful runs are immediately retried." }, "new_cluster": { - "description": "If new_cluster, a description of a cluster that is created for only for this task.", + "description": "If new_cluster, a description of a cluster that is created for each task.", "properties": { "autoscale": { "description": "Parameters needed in order to automatically scale clusters up and down based on load.\nNote: autoscaling works best with DB runtime versions 3.0 or later.", @@ -1281,7 +1281,7 @@ } }, "pause_status": { - "description": "Whether this trigger is paused or not." + "description": "Indicate whether this schedule is paused or not." } } }, @@ -1442,7 +1442,7 @@ } }, "pipelines": { - "description": "List of pipeline definations", + "description": "List of DLT pipelines", "additionalproperties": { "description": "", "properties": { @@ -1836,30 +1836,30 @@ } }, "resources": { - "description": "Specification of databricks resources to instantiate", + "description": "Collection of Databricks resources to deploy.", "properties": { "experiments": { - "description": "", + "description": "List of MLflow experiments", "additionalproperties": { "description": "", "properties": { "artifact_location": { - "description": "" + "description": "Location where artifacts for the experiment are stored." }, "creation_time": { - "description": "" + "description": "Creation time" }, "experiment_id": { - "description": "" + "description": "Unique identifier for the experiment." }, "last_update_time": { - "description": "" + "description": "Last update time" }, "lifecycle_stage": { - "description": "" + "description": "Current life cycle stage of the experiment: \"active\" or \"deleted\".\nDeleted experiments are not returned by APIs." }, "name": { - "description": "" + "description": "Human readable name that identifies the experiment." }, "permissions": { "description": "", @@ -1882,15 +1882,15 @@ } }, "tags": { - "description": "", + "description": "Tags: Additional metadata key-value pairs.", "items": { "description": "", "properties": { "key": { - "description": "" + "description": "The tag key." }, "value": { - "description": "" + "description": "The tag value." } } } @@ -1899,7 +1899,7 @@ } }, "jobs": { - "description": "List of job definations", + "description": "List of Databricks jobs", "additionalproperties": { "description": "", "properties": { @@ -1926,7 +1926,7 @@ "description": "An optional continuous property for this job. The continuous property will ensure that there is always one run executing. Only one of `schedule` and `continuous` can be used.", "properties": { "pause_status": { - "description": "Whether this trigger is paused or not." + "description": "Indicate whether this schedule is paused or not." } } }, @@ -1996,7 +1996,7 @@ "description": "A unique name for the job cluster. This field is required and must be unique within the job.\n`JobTaskSettings` may refer to this field to determine which cluster to launch for the task execution." }, "new_cluster": { - "description": "If new_cluster, a description of a cluster that is created for only for this task.", + "description": "If new_cluster, a description of a cluster that is created for each task.", "properties": { "autoscale": { "description": "Parameters needed in order to automatically scale clusters up and down based on load.\nNote: autoscaling works best with DB runtime versions 3.0 or later.", @@ -2350,7 +2350,7 @@ "description": "An optional periodic schedule for this job. The default behavior is that the job only runs when triggered by clicking “Run Now” in the Jobs UI or sending an API request to `runNow`.", "properties": { "pause_status": { - "description": "Whether this trigger is paused or not." + "description": "Indicate whether this schedule is paused or not." }, "quartz_cron_expression": { "description": "A Cron expression using Quartz syntax that describes the schedule for a job.\nSee [Cron Trigger](http://www.quartz-scheduler.org/documentation/quartz-2.3.0/tutorials/crontrigger.html)\nfor details. This field is required.\"\n" @@ -2523,7 +2523,7 @@ "description": "An optional minimal interval in milliseconds between the start of the failed run and the subsequent retry run. The default behavior is that unsuccessful runs are immediately retried." }, "new_cluster": { - "description": "If new_cluster, a description of a cluster that is created for only for this task.", + "description": "If new_cluster, a description of a cluster that is created for each task.", "properties": { "autoscale": { "description": "Parameters needed in order to automatically scale clusters up and down based on load.\nNote: autoscaling works best with DB runtime versions 3.0 or later.", @@ -3037,7 +3037,7 @@ } }, "pause_status": { - "description": "Whether this trigger is paused or not." + "description": "Indicate whether this schedule is paused or not." } } }, @@ -3198,7 +3198,7 @@ } }, "pipelines": { - "description": "List of pipeline definations", + "description": "List of DLT pipelines", "additionalproperties": { "description": "", "properties": { From be1c422ce659fbe7130defdaea25b61c9fd440ef Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Fri, 7 Jul 2023 01:32:50 +0200 Subject: [PATCH 6/8] added ml docs --- bundle/schema/docs/bundle_descriptions.json | 128 ++++++++++---------- bundle/schema/openapi.go | 18 +++ 2 files changed, 82 insertions(+), 64 deletions(-) diff --git a/bundle/schema/docs/bundle_descriptions.json b/bundle/schema/docs/bundle_descriptions.json index 50d0fce5f8c..330b703c7d0 100644 --- a/bundle/schema/docs/bundle_descriptions.json +++ b/bundle/schema/docs/bundle_descriptions.json @@ -170,7 +170,7 @@ "description": "An optional continuous property for this job. The continuous property will ensure that there is always one run executing. Only one of `schedule` and `continuous` can be used.", "properties": { "pause_status": { - "description": "Indicate whether this schedule is paused or not." + "description": "Indicate whether the continuous execution of the job is paused or not. Defaults to UNPAUSED." } } }, @@ -240,7 +240,7 @@ "description": "A unique name for the job cluster. This field is required and must be unique within the job.\n`JobTaskSettings` may refer to this field to determine which cluster to launch for the task execution." }, "new_cluster": { - "description": "If new_cluster, a description of a cluster that is created for each task.", + "description": "If new_cluster, a description of a cluster that is created for only for this task.", "properties": { "autoscale": { "description": "Parameters needed in order to automatically scale clusters up and down based on load.\nNote: autoscaling works best with DB runtime versions 3.0 or later.", @@ -594,7 +594,7 @@ "description": "An optional periodic schedule for this job. The default behavior is that the job only runs when triggered by clicking “Run Now” in the Jobs UI or sending an API request to `runNow`.", "properties": { "pause_status": { - "description": "Indicate whether this schedule is paused or not." + "description": "Indicate whether the continuous execution of the job is paused or not. Defaults to UNPAUSED." }, "quartz_cron_expression": { "description": "A Cron expression using Quartz syntax that describes the schedule for a job.\nSee [Cron Trigger](http://www.quartz-scheduler.org/documentation/quartz-2.3.0/tutorials/crontrigger.html)\nfor details. This field is required.\"\n" @@ -767,7 +767,7 @@ "description": "An optional minimal interval in milliseconds between the start of the failed run and the subsequent retry run. The default behavior is that unsuccessful runs are immediately retried." }, "new_cluster": { - "description": "If new_cluster, a description of a cluster that is created for each task.", + "description": "If new_cluster, a description of a cluster that is created for only for this task.", "properties": { "autoscale": { "description": "Parameters needed in order to automatically scale clusters up and down based on load.\nNote: autoscaling works best with DB runtime versions 3.0 or later.", @@ -1065,7 +1065,7 @@ "description": "The path of the notebook to be run in the Databricks workspace or remote repository.\nFor notebooks stored in the Databricks workspace, the path must be absolute and begin with a slash.\nFor notebooks stored in a remote repository, the path must be relative. This field is required.\n" }, "source": { - "description": "Optional location type of the notebook. When set to `WORKSPACE`, the notebook will be retrieved\nfrom the local \u003cDatabricks\u003e workspace. When set to `GIT`, the notebook will be retrieved from a Git repository\ndefined in `git_source`. If the value is empty, the task will use `GIT` if `git_source` is defined and `WORKSPACE` otherwise.\n\n* `WORKSPACE`: Notebook is located in \u003cDatabricks\u003e workspace.\n* `GIT`: Notebook is located in cloud Git provider.\n" + "description": "Optional location type of the Python file. When set to `WORKSPACE` or not specified, the file will be retrieved\nfrom the local \u003cDatabricks\u003e workspace or cloud location (if the `python_file` has a URI format). When set to `GIT`,\nthe Python file will be retrieved from a Git repository defined in `git_source`.\n\n* `WORKSPACE`: The Python file is located in a \u003cDatabricks\u003e workspace or at a cloud filesystem URI.\n* `GIT`: The Python file is located in a remote Git repository.\n" } } }, @@ -1153,7 +1153,7 @@ "description": "The Python file to be executed. Cloud file URIs (such as dbfs:/, s3:/, adls:/, gcs:/) and workspace paths are supported. For python files stored in the Databricks workspace, the path must be absolute and begin with `/`. For files stored in a remote repository, the path must be relative. This field is required." }, "source": { - "description": "Optional location type of the notebook. When set to `WORKSPACE`, the notebook will be retrieved\nfrom the local \u003cDatabricks\u003e workspace. When set to `GIT`, the notebook will be retrieved from a Git repository\ndefined in `git_source`. If the value is empty, the task will use `GIT` if `git_source` is defined and `WORKSPACE` otherwise.\n\n* `WORKSPACE`: Notebook is located in \u003cDatabricks\u003e workspace.\n* `GIT`: Notebook is located in cloud Git provider.\n" + "description": "Optional location type of the Python file. When set to `WORKSPACE` or not specified, the file will be retrieved\nfrom the local \u003cDatabricks\u003e workspace or cloud location (if the `python_file` has a URI format). When set to `GIT`,\nthe Python file will be retrieved from a Git repository defined in `git_source`.\n\n* `WORKSPACE`: The Python file is located in a \u003cDatabricks\u003e workspace or at a cloud filesystem URI.\n* `GIT`: The Python file is located in a remote Git repository.\n" } } }, @@ -1281,7 +1281,7 @@ } }, "pause_status": { - "description": "Indicate whether this schedule is paused or not." + "description": "Indicate whether the continuous execution of the job is paused or not. Defaults to UNPAUSED." } } }, @@ -1327,79 +1327,79 @@ } }, "models": { - "description": "", + "description": "List of MLflow models", "additionalproperties": { "description": "", "properties": { "creation_timestamp": { - "description": "" + "description": "Timestamp recorded when this `registered_model` was created." }, "description": { - "description": "" + "description": "Description of this `registered_model`." }, "last_updated_timestamp": { - "description": "" + "description": "Timestamp recorded when metadata for this `registered_model` was last updated." }, "latest_versions": { - "description": "", + "description": "Collection of latest model versions for each stage.\nOnly contains models with current `READY` status.", "items": { "description": "", "properties": { "creation_timestamp": { - "description": "" + "description": "Timestamp recorded when this `model_version` was created." }, "current_stage": { - "description": "" + "description": "Current stage for this `model_version`." }, "description": { - "description": "" + "description": "Description of this `model_version`." }, "last_updated_timestamp": { - "description": "" + "description": "Timestamp recorded when metadata for this `model_version` was last updated." }, "name": { - "description": "" + "description": "Unique name of the model" }, "run_id": { - "description": "" + "description": "MLflow run ID used when creating `model_version`, if `source` was generated by an\nexperiment run stored in MLflow tracking server." }, "run_link": { - "description": "" + "description": "Run Link: Direct link to the run that generated this version" }, "source": { - "description": "" + "description": "URI indicating the location of the source model artifacts, used when creating `model_version`" }, "status": { - "description": "" + "description": "Current status of `model_version`" }, "status_message": { - "description": "" + "description": "Details on current `status`, if it is pending or failed." }, "tags": { - "description": "", + "description": "Tags: Additional metadata key-value pairs for this `model_version`.", "items": { "description": "", "properties": { "key": { - "description": "" + "description": "The tag key." }, "value": { - "description": "" + "description": "The tag value." } } } }, "user_id": { - "description": "" + "description": "User that created this `model_version`." }, "version": { - "description": "" + "description": "Model's version number." } } } }, "name": { - "description": "" + "description": "Unique name for the model." }, "permissions": { "description": "", @@ -1422,21 +1422,21 @@ } }, "tags": { - "description": "", + "description": "Tags: Additional metadata key-value pairs for this `registered_model`.", "items": { "description": "", "properties": { "key": { - "description": "" + "description": "The tag key." }, "value": { - "description": "" + "description": "The tag value." } } } }, "user_id": { - "description": "" + "description": "User that created this `registered_model`" } } } @@ -1926,7 +1926,7 @@ "description": "An optional continuous property for this job. The continuous property will ensure that there is always one run executing. Only one of `schedule` and `continuous` can be used.", "properties": { "pause_status": { - "description": "Indicate whether this schedule is paused or not." + "description": "Indicate whether the continuous execution of the job is paused or not. Defaults to UNPAUSED." } } }, @@ -1996,7 +1996,7 @@ "description": "A unique name for the job cluster. This field is required and must be unique within the job.\n`JobTaskSettings` may refer to this field to determine which cluster to launch for the task execution." }, "new_cluster": { - "description": "If new_cluster, a description of a cluster that is created for each task.", + "description": "If new_cluster, a description of a cluster that is created for only for this task.", "properties": { "autoscale": { "description": "Parameters needed in order to automatically scale clusters up and down based on load.\nNote: autoscaling works best with DB runtime versions 3.0 or later.", @@ -2350,7 +2350,7 @@ "description": "An optional periodic schedule for this job. The default behavior is that the job only runs when triggered by clicking “Run Now” in the Jobs UI or sending an API request to `runNow`.", "properties": { "pause_status": { - "description": "Indicate whether this schedule is paused or not." + "description": "Indicate whether the continuous execution of the job is paused or not. Defaults to UNPAUSED." }, "quartz_cron_expression": { "description": "A Cron expression using Quartz syntax that describes the schedule for a job.\nSee [Cron Trigger](http://www.quartz-scheduler.org/documentation/quartz-2.3.0/tutorials/crontrigger.html)\nfor details. This field is required.\"\n" @@ -2523,7 +2523,7 @@ "description": "An optional minimal interval in milliseconds between the start of the failed run and the subsequent retry run. The default behavior is that unsuccessful runs are immediately retried." }, "new_cluster": { - "description": "If new_cluster, a description of a cluster that is created for each task.", + "description": "If new_cluster, a description of a cluster that is created for only for this task.", "properties": { "autoscale": { "description": "Parameters needed in order to automatically scale clusters up and down based on load.\nNote: autoscaling works best with DB runtime versions 3.0 or later.", @@ -2821,7 +2821,7 @@ "description": "The path of the notebook to be run in the Databricks workspace or remote repository.\nFor notebooks stored in the Databricks workspace, the path must be absolute and begin with a slash.\nFor notebooks stored in a remote repository, the path must be relative. This field is required.\n" }, "source": { - "description": "Optional location type of the notebook. When set to `WORKSPACE`, the notebook will be retrieved\nfrom the local \u003cDatabricks\u003e workspace. When set to `GIT`, the notebook will be retrieved from a Git repository\ndefined in `git_source`. If the value is empty, the task will use `GIT` if `git_source` is defined and `WORKSPACE` otherwise.\n\n* `WORKSPACE`: Notebook is located in \u003cDatabricks\u003e workspace.\n* `GIT`: Notebook is located in cloud Git provider.\n" + "description": "Optional location type of the Python file. When set to `WORKSPACE` or not specified, the file will be retrieved\nfrom the local \u003cDatabricks\u003e workspace or cloud location (if the `python_file` has a URI format). When set to `GIT`,\nthe Python file will be retrieved from a Git repository defined in `git_source`.\n\n* `WORKSPACE`: The Python file is located in a \u003cDatabricks\u003e workspace or at a cloud filesystem URI.\n* `GIT`: The Python file is located in a remote Git repository.\n" } } }, @@ -2909,7 +2909,7 @@ "description": "The Python file to be executed. Cloud file URIs (such as dbfs:/, s3:/, adls:/, gcs:/) and workspace paths are supported. For python files stored in the Databricks workspace, the path must be absolute and begin with `/`. For files stored in a remote repository, the path must be relative. This field is required." }, "source": { - "description": "Optional location type of the notebook. When set to `WORKSPACE`, the notebook will be retrieved\nfrom the local \u003cDatabricks\u003e workspace. When set to `GIT`, the notebook will be retrieved from a Git repository\ndefined in `git_source`. If the value is empty, the task will use `GIT` if `git_source` is defined and `WORKSPACE` otherwise.\n\n* `WORKSPACE`: Notebook is located in \u003cDatabricks\u003e workspace.\n* `GIT`: Notebook is located in cloud Git provider.\n" + "description": "Optional location type of the Python file. When set to `WORKSPACE` or not specified, the file will be retrieved\nfrom the local \u003cDatabricks\u003e workspace or cloud location (if the `python_file` has a URI format). When set to `GIT`,\nthe Python file will be retrieved from a Git repository defined in `git_source`.\n\n* `WORKSPACE`: The Python file is located in a \u003cDatabricks\u003e workspace or at a cloud filesystem URI.\n* `GIT`: The Python file is located in a remote Git repository.\n" } } }, @@ -3037,7 +3037,7 @@ } }, "pause_status": { - "description": "Indicate whether this schedule is paused or not." + "description": "Indicate whether the continuous execution of the job is paused or not. Defaults to UNPAUSED." } } }, @@ -3083,79 +3083,79 @@ } }, "models": { - "description": "", + "description": "List of MLflow models", "additionalproperties": { "description": "", "properties": { "creation_timestamp": { - "description": "" + "description": "Timestamp recorded when this `registered_model` was created." }, "description": { - "description": "" + "description": "Description of this `registered_model`." }, "last_updated_timestamp": { - "description": "" + "description": "Timestamp recorded when metadata for this `registered_model` was last updated." }, "latest_versions": { - "description": "", + "description": "Collection of latest model versions for each stage.\nOnly contains models with current `READY` status.", "items": { "description": "", "properties": { "creation_timestamp": { - "description": "" + "description": "Timestamp recorded when this `model_version` was created." }, "current_stage": { - "description": "" + "description": "Current stage for this `model_version`." }, "description": { - "description": "" + "description": "Description of this `model_version`." }, "last_updated_timestamp": { - "description": "" + "description": "Timestamp recorded when metadata for this `model_version` was last updated." }, "name": { - "description": "" + "description": "Unique name of the model" }, "run_id": { - "description": "" + "description": "MLflow run ID used when creating `model_version`, if `source` was generated by an\nexperiment run stored in MLflow tracking server." }, "run_link": { - "description": "" + "description": "Run Link: Direct link to the run that generated this version" }, "source": { - "description": "" + "description": "URI indicating the location of the source model artifacts, used when creating `model_version`" }, "status": { - "description": "" + "description": "Current status of `model_version`" }, "status_message": { - "description": "" + "description": "Details on current `status`, if it is pending or failed." }, "tags": { - "description": "", + "description": "Tags: Additional metadata key-value pairs for this `model_version`.", "items": { "description": "", "properties": { "key": { - "description": "" + "description": "The tag key." }, "value": { - "description": "" + "description": "The tag value." } } } }, "user_id": { - "description": "" + "description": "User that created this `model_version`." }, "version": { - "description": "" + "description": "Model's version number." } } } }, "name": { - "description": "" + "description": "Unique name for the model." }, "permissions": { "description": "", @@ -3178,21 +3178,21 @@ } }, "tags": { - "description": "", + "description": "Tags: Additional metadata key-value pairs for this `registered_model`.", "items": { "description": "", "properties": { "key": { - "description": "" + "description": "The tag key." }, "value": { - "description": "" + "description": "The tag value." } } } }, "user_id": { - "description": "" + "description": "User that created this `registered_model`" } } } diff --git a/bundle/schema/openapi.go b/bundle/schema/openapi.go index 8724a0838d8..9b4b27dd94e 100644 --- a/bundle/schema/openapi.go +++ b/bundle/schema/openapi.go @@ -196,6 +196,19 @@ func (reader *OpenapiReader) experimentsDocs() (*Docs, error) { return experimentsDocs, nil } +func (reader *OpenapiReader) modelsDocs() (*Docs, error) { + modelSpecSchema, err := reader.readResolvedSchema(SchemaPathPrefix + "ml.Model") + if err != nil { + return nil, err + } + modelDocs := schemaToDocs(modelSpecSchema) + modelsDocs := &Docs{ + Description: "List of MLflow models", + AdditionalProperties: modelDocs, + } + return modelsDocs, nil +} + func (reader *OpenapiReader) ResourcesDocs() (*Docs, error) { jobsDocs, err := reader.jobsDocs() if err != nil { @@ -209,6 +222,10 @@ func (reader *OpenapiReader) ResourcesDocs() (*Docs, error) { if err != nil { return nil, err } + modelsDocs, err := reader.modelsDocs() + if err != nil { + return nil, err + } return &Docs{ Description: "Collection of Databricks resources to deploy.", @@ -216,6 +233,7 @@ func (reader *OpenapiReader) ResourcesDocs() (*Docs, error) { "jobs": jobsDocs, "pipelines": pipelinesDocs, "experiments": experimentsDocs, + "models": modelsDocs, }, }, nil } From b1f050e28d799943924710e3bddaa0a9cb85a97c Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Fri, 7 Jul 2023 01:34:40 +0200 Subject: [PATCH 7/8] - --- bundle/schema/docs/bundle_descriptions.json | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/bundle/schema/docs/bundle_descriptions.json b/bundle/schema/docs/bundle_descriptions.json index 87394105a26..75f1cb7182a 100644 --- a/bundle/schema/docs/bundle_descriptions.json +++ b/bundle/schema/docs/bundle_descriptions.json @@ -240,7 +240,7 @@ "description": "A unique name for the job cluster. This field is required and must be unique within the job.\n`JobTaskSettings` may refer to this field to determine which cluster to launch for the task execution." }, "new_cluster": { - "description": "If new_cluster, a description of a cluster that is created for only for this task.", + "description": "If new_cluster, a description of a cluster that is created for each task.", "properties": { "autoscale": { "description": "Parameters needed in order to automatically scale clusters up and down based on load.\nNote: autoscaling works best with DB runtime versions 3.0 or later.", @@ -767,7 +767,7 @@ "description": "An optional minimal interval in milliseconds between the start of the failed run and the subsequent retry run. The default behavior is that unsuccessful runs are immediately retried." }, "new_cluster": { - "description": "If new_cluster, a description of a cluster that is created for only for this task.", + "description": "If new_cluster, a description of a cluster that is created for each task.", "properties": { "autoscale": { "description": "Parameters needed in order to automatically scale clusters up and down based on load.\nNote: autoscaling works best with DB runtime versions 3.0 or later.", @@ -1996,7 +1996,7 @@ "description": "A unique name for the job cluster. This field is required and must be unique within the job.\n`JobTaskSettings` may refer to this field to determine which cluster to launch for the task execution." }, "new_cluster": { - "description": "If new_cluster, a description of a cluster that is created for only for this task.", + "description": "If new_cluster, a description of a cluster that is created for each task.", "properties": { "autoscale": { "description": "Parameters needed in order to automatically scale clusters up and down based on load.\nNote: autoscaling works best with DB runtime versions 3.0 or later.", @@ -2523,7 +2523,7 @@ "description": "An optional minimal interval in milliseconds between the start of the failed run and the subsequent retry run. The default behavior is that unsuccessful runs are immediately retried." }, "new_cluster": { - "description": "If new_cluster, a description of a cluster that is created for only for this task.", + "description": "If new_cluster, a description of a cluster that is created for each task.", "properties": { "autoscale": { "description": "Parameters needed in order to automatically scale clusters up and down based on load.\nNote: autoscaling works best with DB runtime versions 3.0 or later.", From 6111f822ef22fab1739e9d8dde8715a17d16136b Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Fri, 7 Jul 2023 01:42:34 +0200 Subject: [PATCH 8/8] - --- bundle/schema/docs/bundle_descriptions.json | 22 ++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/bundle/schema/docs/bundle_descriptions.json b/bundle/schema/docs/bundle_descriptions.json index 373b070d357..7734614eca0 100644 --- a/bundle/schema/docs/bundle_descriptions.json +++ b/bundle/schema/docs/bundle_descriptions.json @@ -170,7 +170,7 @@ "description": "An optional continuous property for this job. The continuous property will ensure that there is always one run executing. Only one of `schedule` and `continuous` can be used.", "properties": { "pause_status": { - "description": "Indicate whether the continuous execution of the job is paused or not. Defaults to UNPAUSED." + "description": "Whether this trigger is paused or not." } } }, @@ -240,7 +240,7 @@ "description": "A unique name for the job cluster. This field is required and must be unique within the job.\n`JobTaskSettings` may refer to this field to determine which cluster to launch for the task execution." }, "new_cluster": { - "description": "If new_cluster, a description of a cluster that is created for each task.", + "description": "If new_cluster, a description of a cluster that is created for only for this task.", "properties": { "autoscale": { "description": "Parameters needed in order to automatically scale clusters up and down based on load.\nNote: autoscaling works best with DB runtime versions 3.0 or later.", @@ -594,7 +594,7 @@ "description": "An optional periodic schedule for this job. The default behavior is that the job only runs when triggered by clicking “Run Now” in the Jobs UI or sending an API request to `runNow`.", "properties": { "pause_status": { - "description": "Indicate whether the continuous execution of the job is paused or not. Defaults to UNPAUSED." + "description": "Whether this trigger is paused or not." }, "quartz_cron_expression": { "description": "A Cron expression using Quartz syntax that describes the schedule for a job.\nSee [Cron Trigger](http://www.quartz-scheduler.org/documentation/quartz-2.3.0/tutorials/crontrigger.html)\nfor details. This field is required.\"\n" @@ -767,7 +767,7 @@ "description": "An optional minimal interval in milliseconds between the start of the failed run and the subsequent retry run. The default behavior is that unsuccessful runs are immediately retried." }, "new_cluster": { - "description": "If new_cluster, a description of a cluster that is created for each task.", + "description": "If new_cluster, a description of a cluster that is created for only for this task.", "properties": { "autoscale": { "description": "Parameters needed in order to automatically scale clusters up and down based on load.\nNote: autoscaling works best with DB runtime versions 3.0 or later.", @@ -1281,7 +1281,7 @@ } }, "pause_status": { - "description": "Indicate whether the continuous execution of the job is paused or not. Defaults to UNPAUSED." + "description": "Whether this trigger is paused or not." } } }, @@ -1926,7 +1926,7 @@ "description": "An optional continuous property for this job. The continuous property will ensure that there is always one run executing. Only one of `schedule` and `continuous` can be used.", "properties": { "pause_status": { - "description": "Indicate whether the continuous execution of the job is paused or not. Defaults to UNPAUSED." + "description": "Whether this trigger is paused or not." } } }, @@ -1996,7 +1996,7 @@ "description": "A unique name for the job cluster. This field is required and must be unique within the job.\n`JobTaskSettings` may refer to this field to determine which cluster to launch for the task execution." }, "new_cluster": { - "description": "If new_cluster, a description of a cluster that is created for each task.", + "description": "If new_cluster, a description of a cluster that is created for only for this task.", "properties": { "autoscale": { "description": "Parameters needed in order to automatically scale clusters up and down based on load.\nNote: autoscaling works best with DB runtime versions 3.0 or later.", @@ -2350,7 +2350,7 @@ "description": "An optional periodic schedule for this job. The default behavior is that the job only runs when triggered by clicking “Run Now” in the Jobs UI or sending an API request to `runNow`.", "properties": { "pause_status": { - "description": "Indicate whether the continuous execution of the job is paused or not. Defaults to UNPAUSED." + "description": "Whether this trigger is paused or not." }, "quartz_cron_expression": { "description": "A Cron expression using Quartz syntax that describes the schedule for a job.\nSee [Cron Trigger](http://www.quartz-scheduler.org/documentation/quartz-2.3.0/tutorials/crontrigger.html)\nfor details. This field is required.\"\n" @@ -2523,7 +2523,7 @@ "description": "An optional minimal interval in milliseconds between the start of the failed run and the subsequent retry run. The default behavior is that unsuccessful runs are immediately retried." }, "new_cluster": { - "description": "If new_cluster, a description of a cluster that is created for each task.", + "description": "If new_cluster, a description of a cluster that is created for only for this task.", "properties": { "autoscale": { "description": "Parameters needed in order to automatically scale clusters up and down based on load.\nNote: autoscaling works best with DB runtime versions 3.0 or later.", @@ -3037,7 +3037,7 @@ } }, "pause_status": { - "description": "Indicate whether the continuous execution of the job is paused or not. Defaults to UNPAUSED." + "description": "Whether this trigger is paused or not." } } }, @@ -3591,4 +3591,4 @@ } } } -} +} \ No newline at end of file