Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
776618d
Merge pull request #1 from TechnologyEnhancedLearning/poc-dab-workflow
Phil-NHS Dec 8, 2025
89c5620
Merge pull request #2 from TechnologyEnhancedLearning/staging-data-te…
Phil-NHS Dec 8, 2025
e5b24f4
Merge pull request #3 from TechnologyEnhancedLearning/poc-dab-workflow
Phil-NHS Dec 8, 2025
565e565
Merge pull request #4 from TechnologyEnhancedLearning/poc-dab-workflow
Phil-NHS Dec 8, 2025
43ccf5b
Merge pull request #5 from TechnologyEnhancedLearning/poc-dab-workflow
Phil-NHS Dec 8, 2025
6c0e01d
Merge pull request #6 from TechnologyEnhancedLearning/poc-dab-workflow
Phil-NHS Dec 8, 2025
7ad7b61
Merge pull request #7 from TechnologyEnhancedLearning/poc-dab-workflow
Phil-NHS Dec 9, 2025
d30d678
Merge pull request #8 from TechnologyEnhancedLearning/poc-dab-workflow
Phil-NHS Dec 9, 2025
8b2765a
Merge pull request #9 from TechnologyEnhancedLearning/poc-dab-workflow
Phil-NHS Dec 23, 2025
ada18d5
Merge pull request #10 from TechnologyEnhancedLearning/dev-data-team-…
Phil-NHS Dec 23, 2025
58e31df
Merge pull request #11 from TechnologyEnhancedLearning/staging-data-t…
Phil-NHS Dec 23, 2025
4082126
Add pyspark to development requirements
Phil-NHS Dec 23, 2025
e41f649
Rename pytest step and specify unit tests directory
Phil-NHS Dec 23, 2025
72e29aa
Remove session-scoped Spark fixture
Phil-NHS Dec 23, 2025
33d9f2b
Enhance CI workflow with linting and validation steps
Phil-NHS Dec 23, 2025
f42db77
Add manual integration test workflow for dbx
Phil-NHS Dec 23, 2025
bc4c519
Update CI environment to staging
Phil-NHS Dec 23, 2025
1fd3a21
Update ci.yml
Phil-NHS Dec 23, 2025
85740b4
Add GitHub Actions workflow for staging integration tests
Phil-NHS Dec 23, 2025
bf4d41a
Merge pull request #12 from TechnologyEnhancedLearning/main
Phil-NHS Dec 23, 2025
fd44032
Merge pull request #13 from TechnologyEnhancedLearning/staging-data-t…
Phil-NHS Dec 23, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
94 changes: 53 additions & 41 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
@@ -1,56 +1,68 @@
# We want to run CI processes that can run independent of databricks as branch rules so that we dont # deploy at cost code that we already should know needs changing
# such as linting, and unit test for python, maybe dab? verify
# we run these on all pull request because if there is a hot fix it may not have passed through
# staging for example
# qqqq check this is up to date
name: CI - Pull Request Checks

# Run CI on all pull requests
# Run CI on all pull requests just incase of hot fixes
on:
pull_request:
branches:
- '**' # all branches
workflow_dispatch:

jobs:
ci_checks:
name: "Linting, Unit Tests, DAB Verify"
lint:
name: "Linting"
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v3

- name: Commit lint placeholder
# TODO: Implement commitlint with config from TELBlazor reference
run: |
echo "WARNING: Commit lint step is currently a placeholder."
echo "Reference project: TELBlazor"
# exit 0 <-- Change to 0 if you don't want to block the rest of the pipe yet

pytest:
runs-on: ubuntu-latest

steps:
# Checkout code
- name: Checkout repository
- name: Check out repository
uses: actions/checkout@v4

# Set up Python
- name: Setup Python
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.x"

# Install dependencies used for linting and unit tests
- name: Install dependencies
run: pip install -r requirements-dev.txt

# Run python unit tests
- name: Run Unit Tests
run: pytest tests/unit

# Run python lint
# qqqq on example used flake8 instead
# pyproject.toml will need configuring
- name: Run Linting
run: pylint src

# qqqq to do run commit lint step and put in commit lint config
# see TELBlazor
- name: Commit lint
run: |
echo "Commit lint not implemented"
exit 1

# qqqq to do run version generation step and put in commit lint config
# see TELBlazor
- name: Version Generation Test Run
run: |
echo "Version test run not implemented"
exit 1
python-version: "3.10"

- name: Upgrade pip
run: python -m pip install --upgrade pip

- name: Install project + test deps
run: |
pip install -r requirements-dev.txt
pip install -e .
- name: Run pytest againt unit tests (exclude Databricks tests)
run: |
pytest tests/unit-tests -m "not databricks" -v

dab-validate:
name: "Early warning of dab issues"
runs-on: ubuntu-latest
# Just dev we are not deploying so dont need access to more important service principle
# !!!! TODO !!!!! Should be dev but git needs the service principle putting into its environment
environment: staging
env:
DATABRICKS_HOST: ${{ vars.DBX_HOST }}
DATABRICKS_CLIENT_ID: ${{ vars.DBX_SP_ID }}
DATABRICKS_CLIENT_SECRET: ${{ secrets.DBX_SP_SECRET }}

steps:
- name: Checkout code
uses: actions/checkout@v4

- name: Set up Databricks CLI
uses: databricks/setup-cli@main

- name: Validate Bundle
# Am i forced to have a target its after databrickcfg but shouldnt be
run: databricks bundle validate -t staging
34 changes: 34 additions & 0 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
name: Just a POC Manual Integration Tests - Staging

on:
workflow_dispatch:

jobs:
integration_tests_staging:
name: "Deploy & Test in Staging"
runs-on: ubuntu-latest
# Using the staging environment for Service Principal secrets
environment: staging

env:
# Assume the DAB bundle is already initialized in the repo but this step would actually come after dab deployment unless it can be run against the file directory
# Using Staging Service Principal credentials from Git Secrets/Vars
# using staging here because i havent set up env vars in git for dev yet
DATABRICKS_HOST: ${{ vars.DBX_HOST }}
DATABRICKS_CLIENT_ID: ${{ vars.DBX_SP_ID }}
DATABRICKS_CLIENT_SECRET: ${{ secrets.DBX_SP_SECRET }}

steps:
- name: Checkout code
uses: actions/checkout@v4

- name: Set up Databricks CLI
uses: databricks/setup-cli@main

# this step would come after dab deploy? or would run first dev dab?
# i dont want a fail really and the code to be sat there known to be bad so running locally via a hook could be good pre-push?
# The notebook will actually run both as set up atm but maybe its ok for now
- name: Run Integration Tests
run: |
echo "running the integration yml test job integration_test_job.yml"
databricks bundle run -t staging run_integration_tests
14 changes: 14 additions & 0 deletions .github/workflows/manual-integration-test-poc.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# This yml is so we can tell dbx to run integration tests
name: Trying to trigger dbx integration tests in dbx environ

on:
workflow_dispatch:
inputs:
deploy_target:
description: 'Which DAB target to deploy to?'
required: true
default: 'dev'
type: choice
options:
- dev
- staging
4 changes: 2 additions & 2 deletions .github/workflows/manual-trigger-test-poc.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,6 @@ jobs:
pip install -r requirements-dev.txt
pip install -e .

- name: Run pytest (exclude Databricks tests)
- name: Run pytest againt unit tests (exclude Databricks tests)
run: |
pytest -m "not databricks" -v
pytest tests/unit-tests -m "not databricks" -v
3 changes: 2 additions & 1 deletion requirements-dev.txt
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
pytest
pytest
pyspark
10 changes: 2 additions & 8 deletions tests/unit-tests/transformations/test_date_transformations.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,7 @@
# return session


@pytest.fixture(scope="session")
def spark():
"""Provide Spark session for all tests"""
session = SparkSession.getActiveSession()
if session is None:
raise RuntimeError("No active Spark session found. Running in Databricks?")
return session



@pytest.fixture(scope="function")
Expand Down Expand Up @@ -226,4 +220,4 @@ def test_working_days_values_are_reasonable(spark, sample_dataframe_multiple_mon
assert 19 <= working_days <= 23, \
f"Working days should be between 19-23, got {working_days} for {row['start_date']}"

print("✅ All working days values are reasonable")
print("✅ All working days values are reasonable")