Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 39 additions & 0 deletions databricks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,21 +23,32 @@
## ref [Evanaze example repo](https://github.com/evanaze/dbx-asset-bundle-deployment)
########################################

# TODO for any values exstract them to variable files maybe just service principles to the resource/environments/ folder

bundle:
name: DatabricksPOC
uuid: ba682f8a-5d14-4ae7-a770-d6a359c0e835

include:
# - 'variables/*.yml' if execute in order might be able to put vars in folders based off headings, may affect running in personal area before deploy?
# - resources/variables/serverless_usage_policy_constants.yml
# - resources/variables/env_serverless_usage_policy_constants.yml
- resources/variables/*.yml
- resources/environments/*.yml
- resources/**/*.yml
- resources/**/**/*.yml
# ** double astrix doesnt seem to do sub directories
# poc should show ods ingest, reporting, and tests jobs and pipeline

# When this list get unmanageable we may want to consider a constants file but for similicity here is best for now
variables:
# qqqq redeclaring so it loads them?
# serverless_usage_policies:
# type: complex
# env_serverless_usage_policies:
# type: complex


# ============================================================
# Environment Configuration
# Values that differ per deployment target (personal, dev,
Expand All @@ -51,6 +62,7 @@ variables:
default: "" # Used in dev only; staging and prod should remain empty
description: Prefix applied to developer schemas to allow multiple dev users to share the same catalog safely

# qqqq i could just us bundle.target, maybe there was a reason
env_name:
description: Deployment environment name (personal, dev, staging, prod)

Expand Down Expand Up @@ -137,6 +149,14 @@ targets:
is_continuous: false
pytest_marks: "not dev_skip and not freshness and not manual"
alert_emails: qqqq.can.i.grab.user.email.com
#qqqq unclear whether seeing uses same stuff these should be own vars or dev ones
env_serverless_usage_policies:
env_test_all_id: ${var.serverless_usage_policies.dev_test_all_id}
env_test_dq_id: ${var.serverless_usage_policies.dev_test_dq_id}
env_test_int_id: ${var.serverless_usage_policies.dev_test_int_id}
env_test_unit_id: ${var.serverless_usage_policies.dev_test_unit_id}
env_analysis_batch_id: ${var.serverless_usage_policies.dev_analysis_batch_id}
env_lh_live_id: ${var.serverless_usage_policies.dev_lh_live_id}
permissions:
- level: CAN_MANAGE
user_name: ${workspace.current_user.userName}
Expand All @@ -162,6 +182,13 @@ targets:
is_continuous: false
pytest_marks: "not dev_skip and not freshness and not manual"
alert_emails: BUNDLE_VAR_dev_alert_emails_qqqq
env_serverless_usage_policies:
env_test_all_id: ${var.serverless_usage_policies.dev_test_all_id}
env_test_dq_id: ${var.serverless_usage_policies.dev_test_dq_id}
env_test_int_id: ${var.serverless_usage_policies.dev_test_int_id}
env_test_unit_id: ${var.serverless_usage_policies.dev_test_unit_id}
env_analysis_batch_id: ${var.serverless_usage_policies.dev_analysis_batch_id}
env_lh_live_id: ${var.serverless_usage_policies.dev_lh_live_id}
permissions:
- group_name: dev_env_users
level: CAN_VIEW
Expand All @@ -185,6 +212,13 @@ targets:
storage_account: unifiedrptdeltalake
pytest_marks: "not staging_skip and not freshness and not manual"
alert_emails: BUNDLE_VAR_staging_alert_emails_qqqq
env_serverless_usage_policies:
env_test_all_id: ${var.serverless_usage_policies.staging_test_all_id}
env_test_dq_id: ${var.serverless_usage_policies.staging_test_dq_id}
env_test_int_id: ${var.serverless_usage_policies.staging_test_int_id}
env_test_unit_id: ${var.serverless_usage_policies.staging_test_unit_id}
env_analysis_batch_id: ${var.serverless_usage_policies.staging_analysis_batch_id}
env_lh_live_id: ${var.serverless_usage_policies.staging_lh_live_id}
permissions:
- group_name: staging_env_users
level: CAN_VIEW
Expand Down Expand Up @@ -220,6 +254,11 @@ targets:
storage_account: unifiedrptdeltalake
alert_emails: BUNDLE_VAR_staging_alert_emails_qqqq
# Prod will not have tests in its file system so no pytest_marks here
env_serverless_usage_policies:
env_analysis_batch_id: ${var.serverless_usage_policies.prod_analysis_batch_id}
env_lh_live_id: ${var.serverless_usage_policies.prod_lh_live_id}
# Note: You can leave test IDs blank here or map them to a default
# since Prod doesn't have test policies.
permissions:
- group_name: prod_env_users
level: CAN_VIEW
Expand Down
71 changes: 71 additions & 0 deletions docs/Cost Visibility.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
# Cost Visibility

## What you need to do
Todo: how to assign tagging
Todo: add reminder in pull request template
For every notebook, a tag or query? to identify it???

## Approaches
Todo: table, dashboard, budget tagging, and where triggered, when to pick the tag, what the tage means
budget policys good for, sql would be used for stored procs if we continue using them

## How to view visibility
Todo: link dashboards etc encourage/invite looking at, improvements

# FYI
[budget policies limitations and delay](https://learn.microsoft.com/en-us/azure/databricks/admin/usage/budget-policies)
- Updates to tags won't be reflected in new pipeline updates if the pipeline is in Development mode. The changes take 24 hours to propagate.
- Pipelines triggered by jobs do not inherit the job's serverless budget policy. Users must set the pipeline's policy.

# What you dont need to know

## Setup Budget Policys

00_ because defaultest to highest alphabetically. (We should change this to 5 to give head room)
The ```<env>_<costcentre>_<detail>```
Everyone = People

All = People + Sps

I beleive we can use anchors to give these to the dab for the SPs to use rather than having to apply them

Service principle ones will be given via dab and anchors so easy to update. But ones for ppl will need to be ui.
People are given permissions through groups

Other columns are some of the tags will have

qqqq maybe owner names and groups need to become the same. YES

**This approach means you have to apply policies not just have them autoapplied**
**Job tasks need to have policys and tasks need to be seperate enough to have a single policy each not span two**

Unfortunately currently dab is not setting permissions for sps to use the serverless policys so they have to be set through the UI

| Policy Name | Env | CostCentre | Test / Owner |
|---|---|---|---|
|-------Numbered / General Policies-------| |||
| 00_Dev_Dev_SP_Default | Dev | Dev | SP_Dev |
| 00_All_All_Phil_Default | All | All | Person_Phil |
| 00_Prod_Prod_SP_Default | Prod | Prod | SP_Prod |
| 00_Staging_Staging_SP_Default | Staging | Staging | SP_Staging |
| 01_All_All_PersonAll_Default | All | All | Person_All |
| All_AdhocAnalysis_All | All | AdhocAnalysis | All |
|-------Personal Policies-------| |||
| Personal_Test_All | Personal | Test | All | |
|-------Staging Policies-------| |||
| Staging_Analysis_Batch | Staging | Analysis | | |
| Staging_LH_Live | Staging | LH | | |
| Staging_Test_All | Staging | Test | All | |
| Staging_Test_DQ | Staging | Test | DQ | |
| Staging_Test_Int | Staging | Test | Int | |
| Staging_Test_Unit | Staging | Test | Unit | |
|-------Prod Policies-------| |||
| Prod_Analysis_Batch | Prod | Analysis | | |
| Prod_LH_Live | Prod | LH | | |
|-------Dev Policies-------| |||
| Dev_Analysis_Batch | Dev | Analysis | | |
| Dev_LH_Live | Dev | LH | | |
| Dev_Test_All | Dev | Test | All | |
| Dev_Test_DQ | Dev | Test | DQ | |
| Dev_Test_Int | Dev | Test | Int | |
| Dev_Test_Unit | Dev | Test | Unit | |
3 changes: 3 additions & 0 deletions docs/TODO.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@
- black linting

# Should do!
- take names out of poc
- change from service principle PATs -> Research OAuth M2M via ENTRAID
- had a fail on the manual dev staging prod validation only in dev and staging seems something has expired so is there a connector or better option than expiring PAT
- **IMP** seperated tests requiring rerunning pipelines from those that dont (data quality, sql sp)
- different job per test type and different notebook
- email addresses
Expand Down
86 changes: 86 additions & 0 deletions notebooks/explorations/New Notebook 2026-04-09 11_52_42.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"application/vnd.databricks.v1+cell": {
"cellMetadata": {},
"inputWidgets": {},
"nuid": "6bcca4f1-236f-4dde-8e0b-21281be53bf5",
"showTitle": false,
"tableResultSettingsMap": {},
"title": ""
}
},
"source": [
"# Costs"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"application/vnd.databricks.v1+cell": {
"cellMetadata": {
"byteLimit": 2048000,
"implicitDf": true,
"rowLimit": 10000
},
"inputWidgets": {},
"nuid": "1d769d5c-5607-4744-95ed-250016ee3f51",
"showTitle": false,
"tableResultSettingsMap": {},
"title": ""
}
},
"outputs": [],
"source": [
"%sql\n"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"application/vnd.databricks.v1+cell": {
"cellMetadata": {},
"inputWidgets": {},
"nuid": "8781f33a-2fac-48c1-82c2-5e02c086d795",
"showTitle": false,
"tableResultSettingsMap": {},
"title": ""
}
},
"outputs": [],
"source": []
}
],
"metadata": {
"application/vnd.databricks.v1+notebook": {
"computePreferences": null,
"dashboards": [],
"environmentMetadata": {
"base_environment": "",
"environment_version": "5"
},
"inputWidgetPreferences": null,
"language": "python",
"notebookMetadata": {
"mostRecentlyExecutedCommandWithImplicitDF": {
"commandId": 7981668629863244,
"dataframes": [
"_sqldf"
]
},
"pythonIndentUnit": 4
},
"notebookName": "New Notebook 2026-04-09 11_52_42",
"widgets": {}
},
"language_info": {
"name": "python"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Loading
Loading