TechnologyEnhancedLearning · Phil-NHS · Dec 23, 2025 · Dec 8, 2025 · Dec 8, 2025 · Dec 9, 2025
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -1,56 +1,56 @@
-# We want to run CI processes that can run independent of databricks as branch rules so that we dont # deploy at cost code that we already should know needs changing
-# such as linting, and unit test for python, maybe dab? verify
-# we run these on all pull request because if there is a hot fix it may not have passed through 
-# staging for example
-# qqqq check this is up to date
-name: CI - Pull Request Checks
-
-# Run CI on all pull requests
-on:
-  pull_request:
-    branches:
-      - '**'  # all branches
-
-jobs:
-  ci_checks:
-    name: "Linting, Unit Tests, DAB Verify"
-    runs-on: ubuntu-latest
-
-    steps:
-      # Checkout code
-      - name: Checkout repository
-        uses: actions/checkout@v4
-
-      # Set up Python
-      - name: Setup Python
-        uses: actions/setup-python@v5
-        with:
-          python-version: "3.x"
-
-      # Install dependencies used for linting and unit tests
-      - name: Install dependencies
-        run: pip install -r requirements-dev.txt
-
-      # Run python unit tests
-      - name: Run Unit Tests
-        run: pytest tests/unit
-
-      # Run python lint
-      # qqqq on example used flake8 instead
-      # pyproject.toml will need configuring
-      - name: Run Linting
-        run: pylint src
-        
-      # qqqq to do run commit lint step and put in commit lint config
-      # see TELBlazor
-      - name: Commit lint
-      run: |
-          echo "Commit lint not implemented"
-          exit 1
-        
-      # qqqq to do run version generation step and put in commit lint config
-      # see TELBlazor
-      - name: Version Generation Test Run
-      run: |
-          echo "Version test run not implemented"
-          exit 1
+# We want to run CI processes that can run independent of databricks as branch rules so that we dont # deploy at cost code that we already should know needs changing
+# such as linting, and unit test for python, maybe dab? verify
+# we run these on all pull request because if there is a hot fix it may not have passed through 
+# staging for example
+# qqqq check this is up to date
+name: CI - Pull Request Checks
+
+# Run CI on all pull requests
+on:
+  pull_request:
+    branches:
+      - '**'  # all branches
+
+jobs:
+  ci_checks:
+    name: "Linting, Unit Tests, DAB Verify"
+    runs-on: ubuntu-latest
+
+    steps:
+      # Checkout code
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      # Set up Python
+      - name: Setup Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.x"
+
+      # Install dependencies used for linting and unit tests
+      - name: Install dependencies
+        run: pip install -r requirements-dev.txt
+
+      # Run python unit tests
+      - name: Run Unit Tests
+        run: pytest tests/unit
+
+      # Run python lint
+      # qqqq on example used flake8 instead
+      # pyproject.toml will need configuring
+      - name: Run Linting
+        run: pylint src
+
+      # qqqq to do run commit lint step and put in commit lint config
+      # see TELBlazor
+      - name: Commit lint
+      run: |
+          echo "Commit lint not implemented"
+          exit 1
+
+      # qqqq to do run version generation step and put in commit lint config
+      # see TELBlazor
+      - name: Version Generation Test Run
+      run: |
+          echo "Version test run not implemented"
+          exit 1
diff --git a/.github/workflows/manual-trigger-test-poc.yml b/.github/workflows/manual-trigger-test-poc.yml
@@ -0,0 +1,29 @@
+name: Manual test run (PoC)
+
+on:
+  workflow_dispatch:
+
+jobs:
+  pytest:
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Check out repository
+        uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.10"
+
+      - name: Upgrade pip
+        run: python -m pip install --upgrade pip
+
+      - name: Install project + test deps
+        run: |
+          pip install -r requirements-dev.txt
+          pip install -e .
+
+      - name: Run pytest (exclude Databricks tests)
+        run: |
+          pytest -m "not databricks" -v
diff --git a/.gitignore b/.gitignore
@@ -1,47 +1,47 @@
-# Couldnt find an official gitignore this is  AI generated
-# -----------------------------
-# Databricks / DAB / dbx
-# -----------------------------
-.databricks/          # local workspace metadata / CLI files
-.deploy/              # local deploy cache (dbx/DAB)
-.bundle/              # local bundle files (dbx/DAB)
-*.log                 # temporary logs
-*.tmp                 # temporary files
-dbx_project.yaml.bak  # backup of bundle config
-build/
-dist/
-
-# -----------------------------
-# Python
-# -----------------------------
-__pycache__/
-*.pyc
-*.pyo
-*.pyd
-*.egg-info/
-.venv/
-env/
-pip-selfcheck.json
-
-# -----------------------------
-# Jupyter Notebooks
-# -----------------------------
-.ipynb_checkpoints/
-
-# -----------------------------
-# Scratch / experimental folder
-# -----------------------------
-scratch/**            # ignore all files in scratch
-!scratch/README.md    # except placeholder README.md
-
-# -----------------------------
-# IDE / editor
-# -----------------------------
-.vscode/
-.idea/
-
-# -----------------------------
-# OS / system
-# -----------------------------
-.DS_Store
+# Couldnt find an official gitignore this is  AI generated
+# -----------------------------
+# Databricks / DAB / dbx
+# -----------------------------
+.databricks/          # local workspace metadata / CLI files
+.deploy/              # local deploy cache (dbx/DAB)
+.bundle/              # local bundle files (dbx/DAB)
+*.log                 # temporary logs
+*.tmp                 # temporary files
+dbx_project.yaml.bak  # backup of bundle config
+build/
+dist/
+
+# -----------------------------
+# Python
+# -----------------------------
+__pycache__/
+*.pyc
+*.pyo
+*.pyd
+*.egg-info/
+.venv/
+env/
+pip-selfcheck.json
+
+# -----------------------------
+# Jupyter Notebooks
+# -----------------------------
+.ipynb_checkpoints/
+
+# -----------------------------
+# Scratch / experimental folder
+# -----------------------------
+scratch/**            # ignore all files in scratch
+!scratch/README.md    # except placeholder README.md
+
+# -----------------------------
+# IDE / editor
+# -----------------------------
+.vscode/
+.idea/
+
+# -----------------------------
+# OS / system
+# -----------------------------
+.DS_Store
 Thumbs.db
diff --git a/conftest.py-comebackto.txt b/conftest.py-comebackto.txt
@@ -0,0 +1,99 @@
+# copy paste from [Dab repo examples](https://github.com/databricks/bundle-examples/blob/1cf3dba30a897d68e3e74ab17f0a3dff68392f15/default_python/tests/conftest.py)
+"""This file configures pytest.
+
+This file is in the root since it can be used for tests in any place in this
+project, including tests under resources/.
+"""
+
+import os, sys, pathlib
+from contextlib import contextmanager
+
+
+try:
+    from databricks.connect import DatabricksSession
+    from databricks.sdk import WorkspaceClient
+    from pyspark.sql import SparkSession
+    import pytest
+    import json
+    import csv
+    import os
+except ImportError:
+    raise ImportError(
+        "Test dependencies not found.\n\nRun tests using 'uv run pytest'. See http://docs.astral.sh/uv to learn more about uv."
+    )
+
+
+@pytest.fixture()
+def spark() -> SparkSession:
+    """Provide a SparkSession fixture for tests.
+
+    Minimal example:
+        def test_uses_spark(spark):
+            df = spark.createDataFrame([(1,)], ["x"])
+            assert df.count() == 1
+    """
+    return DatabricksSession.builder.getOrCreate()
+
+
+@pytest.fixture()
+def load_fixture(spark: SparkSession):
+    """Provide a callable to load JSON or CSV from fixtures/ directory.
+
+    Example usage:
+
+        def test_using_fixture(load_fixture):
+            data = load_fixture("my_data.json")
+            assert data.count() >= 1
+    """
+
+    def _loader(filename: str):
+        path = pathlib.Path(__file__).parent.parent / "fixtures" / filename
+        suffix = path.suffix.lower()
+        if suffix == ".json":
+            rows = json.loads(path.read_text())
+            return spark.createDataFrame(rows)
+        if suffix == ".csv":
+            with path.open(newline="") as f:
+                rows = list(csv.DictReader(f))
+            return spark.createDataFrame(rows)
+        raise ValueError(f"Unsupported fixture type for: {filename}")
+
+    return _loader
+
+
+def _enable_fallback_compute():
+    """Enable serverless compute if no compute is specified."""
+    conf = WorkspaceClient().config
+    if conf.serverless_compute_id or conf.cluster_id or os.environ.get("SPARK_REMOTE"):
+        return
+
+    url = "https://docs.databricks.com/dev-tools/databricks-connect/cluster-config"
+    print("☁️ no compute specified, falling back to serverless compute", file=sys.stderr)
+    print(f"  see {url} for manual configuration", file=sys.stdout)
+
+    os.environ["DATABRICKS_SERVERLESS_COMPUTE_ID"] = "auto"
+
+
+@contextmanager
+def _allow_stderr_output(config: pytest.Config):
+    """Temporarily disable pytest output capture."""
+    capman = config.pluginmanager.get_plugin("capturemanager")
+    if capman:
+        with capman.global_and_fixture_disabled():
+            yield
+    else:
+        yield
+
+
+def pytest_configure(config: pytest.Config):
+    """Configure pytest session."""
+    with _allow_stderr_output(config):
+        _enable_fallback_compute()
+
+        # Initialize Spark session eagerly, so it is available even when
+        # SparkSession.builder.getOrCreate() is used. For DB Connect 15+,
+        # we validate version compatibility with the remote cluster.
+        if hasattr(DatabricksSession.builder, "validateSession"):
+            DatabricksSession.builder.validateSession().getOrCreate()
+        else:
+            DatabricksSession.builder.getOrCreate()