diff --git a/.github/workflows/ci-nightly.yml b/.github/workflows/ci-nightly.yml index f0a7649a2b..c67f47cf08 100644 --- a/.github/workflows/ci-nightly.yml +++ b/.github/workflows/ci-nightly.yml @@ -16,6 +16,10 @@ concurrency: cancel-in-progress: true on: + push: + branches: + - "main" + - "pull-request/[0-9]+" schedule: # 2:17 AM UTC daily, after the midnight main CI build finishes. # Avoid minute 0 because GitHub documents high scheduled-workflow load @@ -192,6 +196,82 @@ jobs: test-mode: nightly-numba-cuda matrix_filter: 'map(select(.ENV.MODE == "nightly-numba-cuda"))' + # ── numba-cuda-mlir tests ── + + test-numba-cuda-mlir-linux-64: + name: "Nightly numba-cuda-mlir (linux-64)" + if: ${{ github.repository_owner == 'nvidia' }} + needs: find-wheels + permissions: + contents: read + actions: read + secrets: inherit + uses: ./.github/workflows/test-wheel-linux.yml + with: + build-type: nightly + host-platform: linux-64 + build-ctk-ver: ${{ needs.find-wheels.outputs.CUDA_BUILD_VER }} + run-id: ${{ needs.find-wheels.outputs.RUN_ID }} + sha: ${{ needs.find-wheels.outputs.HEAD_SHA }} + test-mode: nightly-numba-cuda-mlir + matrix_filter: 'map(select(.ENV.MODE == "nightly-numba-cuda-mlir"))' + + test-numba-cuda-mlir-windows: + name: "Nightly numba-cuda-mlir (win-64)" + if: ${{ github.repository_owner == 'nvidia' }} + needs: find-wheels + permissions: + contents: read + actions: read + secrets: inherit + uses: ./.github/workflows/test-wheel-windows.yml + with: + build-type: nightly + host-platform: win-64 + build-ctk-ver: ${{ needs.find-wheels.outputs.CUDA_BUILD_VER }} + run-id: ${{ needs.find-wheels.outputs.RUN_ID }} + sha: ${{ needs.find-wheels.outputs.HEAD_SHA }} + test-mode: nightly-numba-cuda-mlir + matrix_filter: 'map(select(.ENV.MODE == "nightly-numba-cuda-mlir"))' + + # ── Released cuda-core against main pathfinder/bindings ── + + test-cuda-core-linux-64: + name: "Nightly cuda-core (linux-64)" + if: ${{ github.repository_owner == 'nvidia' }} + needs: find-wheels + permissions: + contents: read + actions: read + secrets: inherit + uses: ./.github/workflows/test-wheel-linux.yml + with: + build-type: nightly + host-platform: linux-64 + build-ctk-ver: ${{ needs.find-wheels.outputs.CUDA_BUILD_VER }} + run-id: ${{ needs.find-wheels.outputs.RUN_ID }} + sha: ${{ needs.find-wheels.outputs.HEAD_SHA }} + test-mode: nightly-cuda-core + matrix_filter: 'map(select(.ENV.MODE == "nightly-cuda-core"))' + + test-cuda-core-windows: + name: "Nightly cuda-core (win-64)" + if: ${{ github.repository_owner == 'nvidia' }} + needs: find-wheels + permissions: + contents: read + actions: read + secrets: inherit + uses: ./.github/workflows/test-wheel-windows.yml + with: + build-type: nightly + host-platform: win-64 + build-ctk-ver: ${{ needs.find-wheels.outputs.CUDA_BUILD_VER }} + run-id: ${{ needs.find-wheels.outputs.RUN_ID }} + sha: ${{ needs.find-wheels.outputs.HEAD_SHA }} + test-mode: nightly-cuda-core + matrix_filter: 'map(select(.ENV.MODE == "nightly-cuda-core"))' + # ── Standard tests on nightly-only runners ── test-standard-linux-aarch64: @@ -226,6 +306,10 @@ jobs: - test-numba-cuda-linux-64 - test-numba-cuda-linux-aarch64 - test-numba-cuda-windows + - test-numba-cuda-mlir-linux-64 + - test-numba-cuda-mlir-windows + - test-cuda-core-linux-64 + - test-cuda-core-windows - test-standard-linux-aarch64 steps: - name: Exit @@ -250,6 +334,14 @@ jobs: needs.test-numba-cuda-linux-aarch64.result == 'failure' || needs.test-numba-cuda-windows.result == 'cancelled' || needs.test-numba-cuda-windows.result == 'failure' || + needs.test-numba-cuda-mlir-linux-64.result == 'cancelled' || + needs.test-numba-cuda-mlir-linux-64.result == 'failure' || + needs.test-numba-cuda-mlir-windows.result == 'cancelled' || + needs.test-numba-cuda-mlir-windows.result == 'failure' || + needs.test-cuda-core-linux-64.result == 'cancelled' || + needs.test-cuda-core-linux-64.result == 'failure' || + needs.test-cuda-core-windows.result == 'cancelled' || + needs.test-cuda-core-windows.result == 'failure' || needs.test-standard-linux-aarch64.result == 'cancelled' || needs.test-standard-linux-aarch64.result == 'failure' }}; then exit 1 diff --git a/.github/workflows/test-wheel-linux.yml b/.github/workflows/test-wheel-linux.yml index 982d4d1c49..16af4b15d3 100644 --- a/.github/workflows/test-wheel-linux.yml +++ b/.github/workflows/test-wheel-linux.yml @@ -37,8 +37,9 @@ on: default: '' test-mode: description: > - Test mode: 'standard' (default), 'nightly-pytorch', or - 'nightly-numba-cuda'. + Test mode: 'standard' (default), 'nightly-pytorch', + 'nightly-numba-cuda', 'nightly-numba-cuda-mlir', or + 'nightly-cuda-core'. type: string default: 'standard' sha: @@ -409,6 +410,20 @@ jobs: LOCAL_CTK: ${{ matrix.LOCAL_CTK }} run: run-tests nightly-numba-cuda + - name: Install cuda-python wheels + numba-cuda-mlir + if: ${{ inputs.test-mode == 'nightly-numba-cuda-mlir' }} + env: + CUDA_VER: ${{ matrix.CUDA_VER }} + LOCAL_CTK: ${{ matrix.LOCAL_CTK }} + run: run-tests nightly-numba-cuda-mlir + + - name: Install main pathfinder/bindings + released cuda-core + if: ${{ inputs.test-mode == 'nightly-cuda-core' }} + env: + CUDA_VER: ${{ matrix.CUDA_VER }} + LOCAL_CTK: ${{ matrix.LOCAL_CTK }} + run: run-tests nightly-cuda-core + # ── Nightly: run tests ── - name: Run PyTorch interop tests if: ${{ inputs.test-mode == 'nightly-pytorch' }} @@ -420,3 +435,98 @@ jobs: - name: Run numba-cuda tests if: ${{ inputs.test-mode == 'nightly-numba-cuda' }} run: python -m numba.runtests numba.cuda.tests + + - name: Checkout numba-cuda-mlir tests at matching tag + if: ${{ inputs.test-mode == 'nightly-numba-cuda-mlir' && env.NUMBA_CUDA_MLIR_VER != '' }} + uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3 + with: + repository: NVIDIA/numba-cuda-mlir + ref: v${{ env.NUMBA_CUDA_MLIR_VER }} + path: numba-cuda-mlir-released + + - name: Run numba-cuda-mlir tests + if: ${{ inputs.test-mode == 'nightly-numba-cuda-mlir' && env.NUMBA_CUDA_MLIR_VER != '' }} + run: | + pushd numba-cuda-mlir-released + # Install this tag's test deps (pytest + plugins + ml-dtypes + ...). + pip install --upgrade "pip>=25.1" + pip install --group test + # Skip tests/benchmarks/ and tests/doc_examples/ — they import the + # numba package at collection time, which cuSIMT intentionally does + # not depend on. See NVIDIA/numba-cuda-mlir#136. + # + # Version-gated deselects: when a newer numba-cuda-mlir release + # ships with the referenced fix, the guard evaluates false and the + # tests get run automatically. If they still fail on the newer + # version we hear about it loudly (rather than silently masking). + DESELECTS=() + if python -c "from packaging.version import Version; import sys; sys.exit(0 if Version('${NUMBA_CUDA_MLIR_VER}') <= Version('0.4.0') else 1)"; then + # NVIDIA/numba-cuda-mlir#135: serial-pytest contamination of + # numba_cuda_mlir.cuda.cudadrv from an xfailed test in + # test_nrt_comprehensive.py contaminates any later test that + # touches cuda.cudadrv.driver. Upstream CI hides it via + # `-n auto --dist loadscope`. Which specific tests fail depends + # on collection order (we saw different subsets on linux-64 vs + # win-64 across runs), so we deselect the union of all tests + # #135 lists as vulnerable + test_fortran_contiguous (observed + # to hit the same contamination in our runs). + # + # test_nvjitlink_jit_with_linkable_code_lto_dump_assembly_warn: + # subprocess-invokes `cuobjdump`, not on PATH in the base + # ubuntu:24.04 container. (Linux-only; Windows runners ship + # cuobjdump with the local CTK. No upstream fix yet — pending + # a skip-guard bug to be filed against NVIDIA/numba-cuda-mlir.) + DESELECTS+=( + --deselect 'tests/numba_cuda_tests/cudadrv/test_cuda_array_slicing.py::CudaArraySetting::test_no_sync_default_stream' + --deselect 'tests/numba_cuda_tests/cudadrv/test_cuda_array_slicing.py::CudaArraySetting::test_no_sync_supplied_stream' + --deselect 'tests/numba_cuda_tests/cudadrv/test_cuda_array_slicing.py::CudaArraySetting::test_sync' + --deselect 'tests/numba_cuda_tests/cudapy/test_cuda_array_interface.py::TestCudaArrayInterface::test_consume_no_sync' + --deselect 'tests/numba_cuda_tests/cudapy/test_cuda_array_interface.py::TestCudaArrayInterface::test_consume_sync' + --deselect 'tests/numba_cuda_tests/cudapy/test_cuda_array_interface.py::TestCudaArrayInterface::test_launch_no_sync' + --deselect 'tests/numba_cuda_tests/cudapy/test_cuda_array_interface.py::TestCudaArrayInterface::test_launch_sync' + --deselect 'tests/numba_cuda_tests/cudapy/test_cuda_array_interface.py::TestCudaArrayInterface::test_launch_sync_two_streams' + --deselect 'tests/numba_cuda_tests/cudapy/test_cuda_array_interface.py::TestCudaArrayInterface::test_fortran_contiguous' + --deselect 'tests/numba_cuda_tests/cudadrv/test_nvjitlink.py::TestLinkerDumpAssembly::test_nvjitlink_jit_with_linkable_code_lto_dump_assembly_warn' + ) + fi + pytest -rxXs -v --durations=0 \ + --ignore=tests/benchmarks \ + --ignore=tests/doc_examples \ + "${DESELECTS[@]}" \ + tests/ + popd + + - name: Checkout released cuda-core tests at matching tag + if: ${{ inputs.test-mode == 'nightly-cuda-core' && env.CUDA_CORE_RELEASED_VER != '' }} + uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3 + with: + ref: cuda-core-v${{ env.CUDA_CORE_RELEASED_VER }} + path: cuda-core-released + + - name: Run released cuda-core tests + if: ${{ inputs.test-mode == 'nightly-cuda-core' && env.CUDA_CORE_RELEASED_VER != '' }} + run: | + pushd cuda-core-released/cuda_core + # Install the released tag's test group so we exercise the exact deps + # that cuda-core version shipped with. + pip install --upgrade "pip>=25.1" + pip install --group "${CUDA_CORE_TEST_GROUP}" + # Cap pytest below 9.1: released cuda-core <=1.0.1 has parametrize + # patterns that pytest 9.1 rejects; the main-side fix (#2212) has + # not yet shipped in a cuda-core release. + pip install "pytest<9.1" + # Version-gated deselect: drops automatically when a newer + # cuda-core release with the wrapper-mapping update ships. + DESELECTS=() + if python -c "from packaging.version import Version; import sys; sys.exit(0 if Version('${CUDA_CORE_RELEASED_VER}') <= Version('1.0.1') else 1)"; then + # NvlinkVersion: v1.0.1's wrapper mapping predates + # NvlinkVersion.VERSION_6_0 which main cuda-bindings adds. + # Expected drift on this mode until released cuda-core catches up. + DESELECTS+=( + --deselect 'tests/test_enum_coverage.py::test_wrapper_covers_all_binding_members[NvlinkVersion]' + ) + fi + pytest -rxXs -v --durations=0 --randomly-dont-reorganize \ + "${DESELECTS[@]}" \ + tests/ + popd diff --git a/.github/workflows/test-wheel-windows.yml b/.github/workflows/test-wheel-windows.yml index 2ad263297e..0d9dc78d5d 100644 --- a/.github/workflows/test-wheel-windows.yml +++ b/.github/workflows/test-wheel-windows.yml @@ -37,8 +37,9 @@ on: default: '' test-mode: description: > - Test mode: 'standard' (default), 'nightly-pytorch', or - 'nightly-numba-cuda'. + Test mode: 'standard' (default), 'nightly-pytorch', + 'nightly-numba-cuda', 'nightly-numba-cuda-mlir', or + 'nightly-cuda-core'. type: string default: 'standard' sha: @@ -388,6 +389,22 @@ jobs: shell: bash --noprofile --norc -xeuo pipefail {0} run: run-tests nightly-numba-cuda + - name: Install cuda-python wheels + numba-cuda-mlir + if: ${{ inputs.test-mode == 'nightly-numba-cuda-mlir' }} + env: + CUDA_VER: ${{ matrix.CUDA_VER }} + LOCAL_CTK: ${{ matrix.LOCAL_CTK }} + shell: bash --noprofile --norc -xeuo pipefail {0} + run: run-tests nightly-numba-cuda-mlir + + - name: Install main pathfinder/bindings + released cuda-core + if: ${{ inputs.test-mode == 'nightly-cuda-core' }} + env: + CUDA_VER: ${{ matrix.CUDA_VER }} + LOCAL_CTK: ${{ matrix.LOCAL_CTK }} + shell: bash --noprofile --norc -xeuo pipefail {0} + run: run-tests nightly-cuda-core + # ── Nightly: run tests ── - name: Run PyTorch interop tests if: ${{ inputs.test-mode == 'nightly-pytorch' }} @@ -401,3 +418,85 @@ jobs: if: ${{ inputs.test-mode == 'nightly-numba-cuda' }} shell: bash --noprofile --norc -xeuo pipefail {0} run: python -m numba.runtests numba.cuda.tests + + - name: Checkout numba-cuda-mlir tests at matching tag + if: ${{ inputs.test-mode == 'nightly-numba-cuda-mlir' && env.NUMBA_CUDA_MLIR_VER != '' }} + uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3 + with: + repository: NVIDIA/numba-cuda-mlir + ref: v${{ env.NUMBA_CUDA_MLIR_VER }} + path: numba-cuda-mlir-released + + - name: Run numba-cuda-mlir tests + if: ${{ inputs.test-mode == 'nightly-numba-cuda-mlir' && env.NUMBA_CUDA_MLIR_VER != '' }} + shell: bash --noprofile --norc -xeuo pipefail {0} + run: | + pushd numba-cuda-mlir-released + pip install --upgrade "pip>=25.1" + pip install --group test + # Version-gated deselects — dropped automatically when newer + # cuSIMT release ships. See linux step for full rationale. + # NVIDIA/numba-cuda-mlir#135 poisons a subset of tests that + # varies across runs based on collection order, so we deselect + # the full union rather than trying to enumerate what happened + # to fail on the most recent nightly. + DESELECTS=() + if python -c "from packaging.version import Version; import sys; sys.exit(0 if Version('${NUMBA_CUDA_MLIR_VER}') <= Version('0.4.0') else 1)"; then + DESELECTS+=( + --deselect 'tests/numba_cuda_tests/cudadrv/test_cuda_array_slicing.py::CudaArraySetting::test_no_sync_default_stream' + --deselect 'tests/numba_cuda_tests/cudadrv/test_cuda_array_slicing.py::CudaArraySetting::test_no_sync_supplied_stream' + --deselect 'tests/numba_cuda_tests/cudadrv/test_cuda_array_slicing.py::CudaArraySetting::test_sync' + --deselect 'tests/numba_cuda_tests/cudapy/test_cuda_array_interface.py::TestCudaArrayInterface::test_consume_no_sync' + --deselect 'tests/numba_cuda_tests/cudapy/test_cuda_array_interface.py::TestCudaArrayInterface::test_consume_sync' + --deselect 'tests/numba_cuda_tests/cudapy/test_cuda_array_interface.py::TestCudaArrayInterface::test_launch_no_sync' + --deselect 'tests/numba_cuda_tests/cudapy/test_cuda_array_interface.py::TestCudaArrayInterface::test_launch_sync' + --deselect 'tests/numba_cuda_tests/cudapy/test_cuda_array_interface.py::TestCudaArrayInterface::test_launch_sync_two_streams' + --deselect 'tests/numba_cuda_tests/cudapy/test_cuda_array_interface.py::TestCudaArrayInterface::test_fortran_contiguous' + ) + fi + pytest -rxXs -v --durations=0 \ + --ignore=tests/benchmarks \ + --ignore=tests/doc_examples \ + "${DESELECTS[@]}" \ + tests/ + popd + + - name: Checkout released cuda-core tests at matching tag + if: ${{ inputs.test-mode == 'nightly-cuda-core' && env.CUDA_CORE_RELEASED_VER != '' }} + uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3 + with: + ref: cuda-core-v${{ env.CUDA_CORE_RELEASED_VER }} + path: cuda-core-released + + - name: Run released cuda-core tests + if: ${{ inputs.test-mode == 'nightly-cuda-core' && env.CUDA_CORE_RELEASED_VER != '' }} + shell: bash --noprofile --norc -xeuo pipefail {0} + run: | + pushd cuda-core-released/cuda_core + pip install --upgrade "pip>=25.1" + pip install --group "${CUDA_CORE_TEST_GROUP}" + # Cap pytest below 9.1 — released cuda-core <=1.0.1 has parametrize + # patterns that pytest 9.1 rejects (see #2212). + pip install "pytest<9.1" + # Version-gated deselects — dropped automatically when a newer + # cuda-core release ships. See linux step for full rationale on + # NvlinkVersion. The Windows-only tests are: + # - test_rlcompleter_patch: env-dependent expectation that + # passes on Linux, fails on Windows MCDM. + # - test_non_managed_resources_report_not_managed[pinned]: same + # MCDM mempool OOM v1.0.1 already xfails in + # test_pinned_memory_resource_initialization (TODO(#9999)); + # main fixed the parametrized case via #2139 but v1.0.1 lacks + # the fix. + DESELECTS=() + if python -c "from packaging.version import Version; import sys; sys.exit(0 if Version('${CUDA_CORE_RELEASED_VER}') <= Version('1.0.1') else 1)"; then + DESELECTS+=( + --deselect 'tests/test_enum_coverage.py::test_wrapper_covers_all_binding_members[NvlinkVersion]' + --deselect 'tests/test_rlcompleter_patch.py::test_opt_out_env_var_disables_patch_even_when_interactive' + --deselect 'tests/test_memory.py::test_non_managed_resources_report_not_managed[pinned]' + ) + fi + pytest -rxXs -v --durations=0 --randomly-dont-reorganize \ + "${DESELECTS[@]}" \ + tests/ + popd diff --git a/ci/test-matrix.yml b/ci/test-matrix.yml index c9eafd4f52..cd0944f200 100644 --- a/ci/test-matrix.yml +++ b/ci/test-matrix.yml @@ -29,7 +29,7 @@ # subsequent steps (including the cuda.bindings and cuda.core test # steps). Nightly rows also use ENV.MODE as a matrix-filter tag (see # ci-nightly.yml). Examples: -# ENV: { CUDA_PYTHON_PER_THREAD_DEFAULT_STREAM: '1' } +# ENV: { CUDA_PYTHON_CUDA_PER_THREAD_DEFAULT_STREAM: '1' } # ENV: { MODE: 'nightly-pytorch', TORCH_VER: '2.12.1', TORCH_CUDA: 'cu126' } linux: @@ -41,7 +41,7 @@ linux: - { ARCH: 'amd64', PY_VER: '3.11', CUDA_VER: '12.9.1', LOCAL_CTK: '0', GPU: 'rtxpro6000', GPU_COUNT: '1', DRIVER: 'latest' } - { ARCH: 'amd64', PY_VER: '3.11', CUDA_VER: '13.0.2', LOCAL_CTK: '1', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest' } - { ARCH: 'amd64', PY_VER: '3.11', CUDA_VER: '13.3.0', LOCAL_CTK: '1', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest' } - - { ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '12.9.1', LOCAL_CTK: '1', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', ENV: { CUDA_PYTHON_PER_THREAD_DEFAULT_STREAM: '1' } } + - { ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '12.9.1', LOCAL_CTK: '1', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', ENV: { CUDA_PYTHON_CUDA_PER_THREAD_DEFAULT_STREAM: '1' } } - { ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '13.0.2', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest' } - { ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '13.3.0', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest' } - { ARCH: 'amd64', PY_VER: '3.13', CUDA_VER: '12.9.1', LOCAL_CTK: '0', GPU: 'v100', GPU_COUNT: '1', DRIVER: 'latest' } @@ -96,6 +96,11 @@ linux: - { ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '13.3.0', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: '580.65.06', ENV: { MODE: 'nightly-numba-cuda' } } - { ARCH: 'arm64', PY_VER: '3.12', CUDA_VER: '12.9.1', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', ENV: { MODE: 'nightly-numba-cuda' } } - { ARCH: 'arm64', PY_VER: '3.12', CUDA_VER: '13.3.0', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', ENV: { MODE: 'nightly-numba-cuda' } } + # nightly-numba-cuda-mlir (MLIR backend, linux-64 only) + - { ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '12.9.1', LOCAL_CTK: '0', GPU: 'rtxpro6000', GPU_COUNT: '1', DRIVER: 'latest', ENV: { MODE: 'nightly-numba-cuda-mlir' } } + - { ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '13.3.0', LOCAL_CTK: '0', GPU: 'rtxpro6000', GPU_COUNT: '1', DRIVER: 'latest', ENV: { MODE: 'nightly-numba-cuda-mlir' } } + # nightly-cuda-core (released cuda-core from PyPI against main pathfinder/bindings) + - { ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '13.3.0', LOCAL_CTK: '0', GPU: 'a100', GPU_COUNT: '1', DRIVER: 'latest', ENV: { MODE: 'nightly-cuda-core' } } # nightly-standard (arm64 nightly-only runners — per runner team request) # TODO: gh200 row disabled — currently hangs on stream-ordered memory # allocator (cudaMallocAsync); runner pool needs fixing first. @@ -117,7 +122,7 @@ windows: - { ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '13.3.0', LOCAL_CTK: '1', GPU: 'a100', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'TCC' } - { ARCH: 'amd64', PY_VER: '3.13', CUDA_VER: '12.9.1', LOCAL_CTK: '1', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'TCC' } - { ARCH: 'amd64', PY_VER: '3.13', CUDA_VER: '13.0.2', LOCAL_CTK: '0', GPU: 'rtxpro6000', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'MCDM' } - - { ARCH: 'amd64', PY_VER: '3.13', CUDA_VER: '13.3.0', LOCAL_CTK: '0', GPU: 'rtxpro6000', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'MCDM', ENV: { CUDA_PYTHON_PER_THREAD_DEFAULT_STREAM: '1' } } + - { ARCH: 'amd64', PY_VER: '3.13', CUDA_VER: '13.3.0', LOCAL_CTK: '0', GPU: 'rtxpro6000', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'MCDM', ENV: { CUDA_PYTHON_CUDA_PER_THREAD_DEFAULT_STREAM: '1' } } - { ARCH: 'amd64', PY_VER: '3.14', CUDA_VER: '12.9.1', LOCAL_CTK: '0', GPU: 'v100', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'TCC' } - { ARCH: 'amd64', PY_VER: '3.14', CUDA_VER: '13.0.2', LOCAL_CTK: '1', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'MCDM' } - { ARCH: 'amd64', PY_VER: '3.14', CUDA_VER: '13.3.0', LOCAL_CTK: '1', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'MCDM' } @@ -136,3 +141,8 @@ windows: # nightly-numba-cuda - { ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '12.9.1', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'TCC', ENV: { MODE: 'nightly-numba-cuda' } } - { ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '13.3.0', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: '596.36', DRIVER_MODE: 'TCC', ENV: { MODE: 'nightly-numba-cuda' } } + # nightly-numba-cuda-mlir (MLIR backend, win-64) + - { ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '12.9.1', LOCAL_CTK: '0', GPU: 'rtxpro6000', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'MCDM', ENV: { MODE: 'nightly-numba-cuda-mlir' } } + - { ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '13.3.0', LOCAL_CTK: '0', GPU: 'rtxpro6000', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'MCDM', ENV: { MODE: 'nightly-numba-cuda-mlir' } } + # nightly-cuda-core (released cuda-core from PyPI against main pathfinder/bindings) + - { ARCH: 'amd64', PY_VER: '3.14', CUDA_VER: '13.3.0', LOCAL_CTK: '0', GPU: 'a100', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'MCDM', ENV: { MODE: 'nightly-cuda-core' } } diff --git a/ci/tools/run-tests b/ci/tools/run-tests index 1ca54ba820..c5c0cc1233 100755 --- a/ci/tools/run-tests +++ b/ci/tools/run-tests @@ -93,10 +93,9 @@ elif [[ "${test_module}" == "core" || "${test_module}" == nightly-* ]]; then PATHFINDER_WHL=($(realpath ./cuda_pathfinder/*.whl)) fi - # pushd so --group reads test dependency groups from cuda_core/pyproject.toml. - pushd ./cuda_core - if [[ "${test_module}" == "core" ]]; then + # pushd so --group reads test dependency groups from cuda_core/pyproject.toml. + pushd ./cuda_core echo "Installing bindings (source: ${BINDINGS_SOURCE})" pip install "${BINDINGS_ARGS[@]}" echo "Installing core wheel" @@ -112,10 +111,28 @@ elif [[ "${test_module}" == "core" || "${test_module}" == nightly-* ]]; then if [[ "${SKIP_CYTHON_TEST}" == 0 ]]; then ${SANITIZER_CMD} pytest -rxXs -v --durations=0 --randomly-dont-reorganize tests/cython fi + popd + elif [[ "${test_module}" == "nightly-cuda-core" ]]; then + # Test the *released* cuda-core (from PyPI) against *main*-built pathfinder + # and cuda-bindings. The workflow follows up with an actions/checkout of the + # matching cuda-core-v tag so the released version's own test suite + # (which is not shipped in the wheel) can be exercised. + echo "Installing pathfinder + bindings from main + released cuda-core from PyPI" + pip install "${PATHFINDER_WHL[@]}" "${BINDINGS_ARGS[@]}" "cuda-core[cu${TEST_CUDA_MAJOR}]" + + released_ver=$(pip show cuda-core | awk '/^Version:/{print $2}') + if [[ -n "${GITHUB_ENV:-}" ]]; then + echo "CUDA_CORE_RELEASED_VER=${released_ver}" >> "${GITHUB_ENV}" + echo "CUDA_CORE_TEST_GROUP=test-cu${TEST_CUDA_MAJOR}${FREE_THREADING}" >> "${GITHUB_ENV}" + fi + echo "Installed packages before released cuda-core tests:" + pip list else - # Nightly optional-dependency testing. - # Install ALL wheels (pathfinder + bindings + core) and the optional dep - # in a single pip call so pip resolves version constraints in one shot. + # Nightly optional-dependency testing: nightly-pytorch, nightly-numba-cuda, + # nightly-numba-cuda-mlir. Install ALL cuda-python wheels (pathfinder + + # bindings + core) and the optional dep in a single pip call so pip resolves + # version constraints in one shot. + pushd ./cuda_core PIP_ARGS=( "${PATHFINDER_WHL[@]}" "${BINDINGS_ARGS[@]}" @@ -144,12 +161,26 @@ elif [[ "${test_module}" == "core" || "${test_module}" == nightly-* ]]; then "cupy-cuda${TEST_CUDA_MAJOR}x" psutil cffi pytest-xdist pytest-benchmark filecheck ml_dtypes statistics ) + elif [[ "${test_module}" == "nightly-numba-cuda-mlir" ]]; then + echo "Installing pathfinder + bindings + core + numba-cuda-mlir" + # numpy<2.5: numba-cuda-mlir 0.4.0 registers np.row_stack, which was + # removed in NumPy 2.5. See NVIDIA/numba-cuda-mlir#154. + PIP_ARGS+=("numba-cuda-mlir[cu${TEST_CUDA_MAJOR}]" "numpy<2.5") fi pip install "${PIP_ARGS[@]}" echo "Nightly install complete — installed packages:" pip list + popd + + if [[ "${test_module}" == "nightly-numba-cuda-mlir" ]]; then + # Expose the installed numba-cuda-mlir version so the workflow can + # actions/checkout the matching v tag from NVIDIA/numba-cuda-mlir + # (the wheel does not ship test_*.py files). + installed_ver=$(pip show numba-cuda-mlir | awk '/^Version:/{print $2}') + if [[ -n "${GITHUB_ENV:-}" ]]; then + echo "NUMBA_CUDA_MLIR_VER=${installed_ver}" >> "${GITHUB_ENV}" + fi + fi fi - - popd fi