Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
112 changes: 56 additions & 56 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
@@ -1,56 +1,56 @@
# We want to run CI processes that can run independent of databricks as branch rules so that we dont # deploy at cost code that we already should know needs changing
# such as linting, and unit test for python, maybe dab? verify
# we run these on all pull request because if there is a hot fix it may not have passed through
# staging for example
# qqqq check this is up to date
name: CI - Pull Request Checks
# Run CI on all pull requests
on:
pull_request:
branches:
- '**' # all branches
jobs:
ci_checks:
name: "Linting, Unit Tests, DAB Verify"
runs-on: ubuntu-latest
steps:
# Checkout code
- name: Checkout repository
uses: actions/checkout@v4
# Set up Python
- name: Setup Python
uses: actions/setup-python@v5
with:
python-version: "3.x"
# Install dependencies used for linting and unit tests
- name: Install dependencies
run: pip install -r requirements-dev.txt
# Run python unit tests
- name: Run Unit Tests
run: pytest tests/unit
# Run python lint
# qqqq on example used flake8 instead
# pyproject.toml will need configuring
- name: Run Linting
run: pylint src
# qqqq to do run commit lint step and put in commit lint config
# see TELBlazor
- name: Commit lint
run: |
echo "Commit lint not implemented"
exit 1
# qqqq to do run version generation step and put in commit lint config
# see TELBlazor
- name: Version Generation Test Run
run: |
echo "Version test run not implemented"
exit 1
# We want to run CI processes that can run independent of databricks as branch rules so that we dont # deploy at cost code that we already should know needs changing
# such as linting, and unit test for python, maybe dab? verify
# we run these on all pull request because if there is a hot fix it may not have passed through
# staging for example
# qqqq check this is up to date
name: CI - Pull Request Checks

# Run CI on all pull requests
on:
pull_request:
branches:
- '**' # all branches

jobs:
ci_checks:
name: "Linting, Unit Tests, DAB Verify"
runs-on: ubuntu-latest

steps:
# Checkout code
- name: Checkout repository
uses: actions/checkout@v4

# Set up Python
- name: Setup Python
uses: actions/setup-python@v5
with:
python-version: "3.x"

# Install dependencies used for linting and unit tests
- name: Install dependencies
run: pip install -r requirements-dev.txt

# Run python unit tests
- name: Run Unit Tests
run: pytest tests/unit

# Run python lint
# qqqq on example used flake8 instead
# pyproject.toml will need configuring
- name: Run Linting
run: pylint src

# qqqq to do run commit lint step and put in commit lint config
# see TELBlazor
- name: Commit lint
run: |
echo "Commit lint not implemented"
exit 1

# qqqq to do run version generation step and put in commit lint config
# see TELBlazor
- name: Version Generation Test Run
run: |
echo "Version test run not implemented"
exit 1
29 changes: 29 additions & 0 deletions .github/workflows/manual-trigger-test-poc.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
name: Manual test run (PoC)

on:
workflow_dispatch:

jobs:
pytest:
runs-on: ubuntu-latest

steps:
- name: Check out repository
uses: actions/checkout@v4

- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.10"

- name: Upgrade pip
run: python -m pip install --upgrade pip

- name: Install project + test deps
run: |
pip install -r requirements-dev.txt
pip install -e .

- name: Run pytest (exclude Databricks tests)
run: |
pytest -m "not databricks" -v
92 changes: 46 additions & 46 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,47 +1,47 @@
# Couldnt find an official gitignore this is AI generated
# -----------------------------
# Databricks / DAB / dbx
# -----------------------------
.databricks/ # local workspace metadata / CLI files
.deploy/ # local deploy cache (dbx/DAB)
.bundle/ # local bundle files (dbx/DAB)
*.log # temporary logs
*.tmp # temporary files
dbx_project.yaml.bak # backup of bundle config
build/
dist/
# -----------------------------
# Python
# -----------------------------
__pycache__/
*.pyc
*.pyo
*.pyd
*.egg-info/
.venv/
env/
pip-selfcheck.json
# -----------------------------
# Jupyter Notebooks
# -----------------------------
.ipynb_checkpoints/
# -----------------------------
# Scratch / experimental folder
# -----------------------------
scratch/** # ignore all files in scratch
!scratch/README.md # except placeholder README.md
# -----------------------------
# IDE / editor
# -----------------------------
.vscode/
.idea/
# -----------------------------
# OS / system
# -----------------------------
.DS_Store
# Couldnt find an official gitignore this is AI generated
# -----------------------------
# Databricks / DAB / dbx
# -----------------------------
.databricks/ # local workspace metadata / CLI files
.deploy/ # local deploy cache (dbx/DAB)
.bundle/ # local bundle files (dbx/DAB)
*.log # temporary logs
*.tmp # temporary files
dbx_project.yaml.bak # backup of bundle config
build/
dist/

# -----------------------------
# Python
# -----------------------------
__pycache__/
*.pyc
*.pyo
*.pyd
*.egg-info/
.venv/
env/
pip-selfcheck.json

# -----------------------------
# Jupyter Notebooks
# -----------------------------
.ipynb_checkpoints/

# -----------------------------
# Scratch / experimental folder
# -----------------------------
scratch/** # ignore all files in scratch
!scratch/README.md # except placeholder README.md

# -----------------------------
# IDE / editor
# -----------------------------
.vscode/
.idea/

# -----------------------------
# OS / system
# -----------------------------
.DS_Store
Thumbs.db
99 changes: 99 additions & 0 deletions conftest.py-comebackto.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
# copy paste from [Dab repo examples](https://github.com/databricks/bundle-examples/blob/1cf3dba30a897d68e3e74ab17f0a3dff68392f15/default_python/tests/conftest.py)
"""This file configures pytest.

This file is in the root since it can be used for tests in any place in this
project, including tests under resources/.
"""

import os, sys, pathlib
from contextlib import contextmanager


try:
from databricks.connect import DatabricksSession
from databricks.sdk import WorkspaceClient
from pyspark.sql import SparkSession
import pytest
import json
import csv
import os
except ImportError:
raise ImportError(
"Test dependencies not found.\n\nRun tests using 'uv run pytest'. See http://docs.astral.sh/uv to learn more about uv."
)


@pytest.fixture()
def spark() -> SparkSession:
"""Provide a SparkSession fixture for tests.

Minimal example:
def test_uses_spark(spark):
df = spark.createDataFrame([(1,)], ["x"])
assert df.count() == 1
"""
return DatabricksSession.builder.getOrCreate()


@pytest.fixture()
def load_fixture(spark: SparkSession):
"""Provide a callable to load JSON or CSV from fixtures/ directory.

Example usage:

def test_using_fixture(load_fixture):
data = load_fixture("my_data.json")
assert data.count() >= 1
"""

def _loader(filename: str):
path = pathlib.Path(__file__).parent.parent / "fixtures" / filename
suffix = path.suffix.lower()
if suffix == ".json":
rows = json.loads(path.read_text())
return spark.createDataFrame(rows)
if suffix == ".csv":
with path.open(newline="") as f:
rows = list(csv.DictReader(f))
return spark.createDataFrame(rows)
raise ValueError(f"Unsupported fixture type for: {filename}")

return _loader


def _enable_fallback_compute():
"""Enable serverless compute if no compute is specified."""
conf = WorkspaceClient().config
if conf.serverless_compute_id or conf.cluster_id or os.environ.get("SPARK_REMOTE"):
return

url = "https://docs.databricks.com/dev-tools/databricks-connect/cluster-config"
print("☁️ no compute specified, falling back to serverless compute", file=sys.stderr)
print(f" see {url} for manual configuration", file=sys.stdout)

os.environ["DATABRICKS_SERVERLESS_COMPUTE_ID"] = "auto"


@contextmanager
def _allow_stderr_output(config: pytest.Config):
"""Temporarily disable pytest output capture."""
capman = config.pluginmanager.get_plugin("capturemanager")
if capman:
with capman.global_and_fixture_disabled():
yield
else:
yield


def pytest_configure(config: pytest.Config):
"""Configure pytest session."""
with _allow_stderr_output(config):
_enable_fallback_compute()

# Initialize Spark session eagerly, so it is available even when
# SparkSession.builder.getOrCreate() is used. For DB Connect 15+,
# we validate version compatibility with the remote cluster.
if hasattr(DatabricksSession.builder, "validateSession"):
DatabricksSession.builder.validateSession().getOrCreate()
else:
DatabricksSession.builder.getOrCreate()
Loading