From 197171747ee667eda1d3a583cd5dc14f1b433265 Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Tue, 2 Jun 2026 10:02:15 -0700 Subject: [PATCH 1/2] Validate gpu dispatch flag as bool, fail closed (#2819) The gpu flag selected the GPU backend by truthiness on both the read (open_geotiff) and write (to_geotiff) paths. A non-bool like gpu="False" is truthy, so it routed to the GPU path instead of raising. bool is also an int subclass, so gpu=1 / gpu=0 slipped past int-style guards. Add a shared _validate_gpu_arg helper that rejects non-bool gpu with TypeError before any branch reads it. The read path requires a bool (default False); the write path also accepts None, its auto-detect sentinel. np.bool_ is accepted alongside Python bool, matching the existing nodata validation. Read-path validation lives in _validate_dispatch_kwargs, so the direct dask / gpu / vrt backends inherit the same check. --- xrspatial/geotiff/_validation.py | 29 +++++++ xrspatial/geotiff/_writers/eager.py | 14 +++- .../tests/unit/test_input_validation.py | 80 +++++++++++++++++++ 3 files changed, 120 insertions(+), 3 deletions(-) diff --git a/xrspatial/geotiff/_validation.py b/xrspatial/geotiff/_validation.py index fe36c7907..db4f47a6b 100644 --- a/xrspatial/geotiff/_validation.py +++ b/xrspatial/geotiff/_validation.py @@ -360,6 +360,34 @@ def _validate_overview_level_arg(overview_level) -> None: ) +def _validate_gpu_arg(gpu, *, allow_none: bool = False) -> None: + """Validate the ``gpu`` dispatch flag type. + + ``gpu`` selects the GPU backend by truthiness on both the read + (``open_geotiff`` / ``_validate_dispatch_kwargs``) and write + (``to_geotiff``) paths. Without a type check, a non-bool such as + ``gpu="False"`` is truthy and silently routes to the GPU path + instead of raising. ``bool`` is also a subclass of ``int``, so + ``gpu=1`` / ``gpu=0`` would otherwise sneak through any + ``isinstance(gpu, int)`` style guard; reject them as non-bool. + + ``np.bool_`` is accepted alongside Python ``bool`` (it is not a + ``bool`` subclass but is the obvious numpy equivalent, matching how + ``nodata`` validation treats ``(bool, np.bool_)``). + + ``allow_none=True`` keeps the writer's ``gpu=None`` "auto-detect + from the data" sentinel valid; the read path leaves it False so + only a ``bool`` is accepted. + """ + if allow_none and gpu is None: + return + if not isinstance(gpu, (bool, np.bool_)): + suffix = " or None" if allow_none else "" + raise TypeError( + f"gpu must be a bool{suffix}, got " + f"{type(gpu).__name__}: {gpu!r}") + + def _validate_dispatch_kwargs( *, source, @@ -444,6 +472,7 @@ def _validate_dispatch_kwargs( from ._reader import _MAX_CLOUD_BYTES_SENTINEL _validate_overview_level_arg(overview_level) + _validate_gpu_arg(gpu) if on_gpu_failure is not _ON_GPU_FAILURE_SENTINEL and not gpu: raise ValueError( diff --git a/xrspatial/geotiff/_writers/eager.py b/xrspatial/geotiff/_writers/eager.py index 0c027d82b..1ea7fd22a 100644 --- a/xrspatial/geotiff/_writers/eager.py +++ b/xrspatial/geotiff/_writers/eager.py @@ -34,9 +34,10 @@ from .._nodata import NodataLifecycle as _NL from .._runtime import (GeoTIFFFallbackWarning, _geotiff_strict_mode, _gpu_fallback_warning_message, _resolve_spatial_coords) -from .._validation import (_validate_3d_writer_dims, _validate_no_rotated_affine, - _validate_nodata_arg, _validate_tile_size_arg, - _validate_writer_spatial_shape, validate_write_metadata) +from .._validation import (_validate_3d_writer_dims, _validate_gpu_arg, + _validate_no_rotated_affine, _validate_nodata_arg, + _validate_tile_size_arg, _validate_writer_spatial_shape, + validate_write_metadata) from .._writer import _COG_REQUIRES_TILED_MSG, write from .gpu import write_geotiff_gpu @@ -383,6 +384,13 @@ def to_geotiff(data: xr.DataArray | np.ndarray, raise TypeError( f"nodata must be numeric (int or float), got {nodata!r}") + # Reject non-bool ``gpu`` before the truthiness dispatch below. + # ``use_gpu`` keys the GPU path off ``gpu`` directly, so a value + # like ``gpu="False"`` (truthy) would silently select the GPU + # writer. ``None`` stays valid here: it means "auto-detect from the + # data" on the write path. + _validate_gpu_arg(gpu, allow_none=True) + # tiled=False ignores tile_size for the strip-layout pixel data, so # historically validation only ran when tiled=True. The COG path # (cog=True) still reads tile_size to auto-generate overviews in diff --git a/xrspatial/geotiff/tests/unit/test_input_validation.py b/xrspatial/geotiff/tests/unit/test_input_validation.py index 3a82c195c..c710b0ce5 100644 --- a/xrspatial/geotiff/tests/unit/test_input_validation.py +++ b/xrspatial/geotiff/tests/unit/test_input_validation.py @@ -1767,3 +1767,83 @@ def test_dask_cupy_1xN_raises(self, tmp_path): p = str(tmp_path / "dask_cupy_fail_1xN.tif") with pytest.raises(ValueError, match="(?i)pixel size|transform"): to_geotiff(da_gpu, p) + + +# =========================================================================== +# Section: gpu dispatch flag is fail-closed +# +# ``gpu`` selected the GPU backend by truthiness on both the read +# (``open_geotiff``) and write (``to_geotiff``) paths. A non-bool such +# as ``gpu="False"`` is truthy, so it silently routed to the GPU path +# instead of raising. ``bool`` is also a subclass of ``int``, so +# ``gpu=1`` / ``gpu=0`` slipped past an ``isinstance(gpu, int)`` style +# guard. ``gpu`` must be a real ``bool`` (read path) or ``bool``/``None`` +# (write path, where ``None`` means auto-detect from the data). See +# issue #2819. +# =========================================================================== + + +@pytest.fixture +def gpu_arg_data(): + """A small CPU DataArray for the writer ``gpu`` validation tests.""" + return xr.DataArray( + np.ones((4, 4), dtype="float64"), + dims=("y", "x"), + coords={"y": [0.5, 1.5, 2.5, 3.5], "x": [0.5, 1.5, 2.5, 3.5]}, + attrs={"crs": 4326}, + ) + + +class TestGpuArgFailClosed: + """Non-bool ``gpu`` raises ``TypeError`` before backend dispatch.""" + + # ---- read path: open_geotiff ---- + + @pytest.mark.parametrize("bad", ["False", "True", 1, 0, 1.0, [True]]) + def test_open_geotiff_non_bool_gpu_rejected(self, multiband_tiff_path, bad): + src, _ = multiband_tiff_path + with pytest.raises(TypeError, match="gpu must be a bool"): + open_geotiff(src, gpu=bad) + + @pytest.mark.parametrize("bad", ["False", 1, 0]) + def test_open_geotiff_gpu_rejected_before_io(self, bad): + # Validation runs before the file is opened, so a missing source + # path still surfaces the gpu TypeError rather than an OSError. + with pytest.raises(TypeError, match="gpu must be a bool"): + open_geotiff("does_not_exist_2819.tif", gpu=bad) + + def test_open_geotiff_gpu_none_rejected(self, multiband_tiff_path): + # The read path has no auto-detect sentinel; None is not a bool. + src, _ = multiband_tiff_path + with pytest.raises(TypeError, match="gpu must be a bool"): + open_geotiff(src, gpu=None) + + def test_open_geotiff_gpu_false_still_reads(self, multiband_tiff_path): + src, arr = multiband_tiff_path + da = open_geotiff(src, gpu=False) + assert da.shape == (4, 6, 3) + + # ---- write path: to_geotiff ---- + + @pytest.mark.parametrize("bad", ["False", "True", 1, 0, 1.0, [True]]) + def test_to_geotiff_non_bool_gpu_rejected(self, gpu_arg_data, tmp_path, bad): + p = str(tmp_path / "gpu_arg_2819.tif") + with pytest.raises(TypeError, match="gpu must be a bool"): + to_geotiff(gpu_arg_data, p, gpu=bad) + + def test_to_geotiff_gpu_none_auto_detects(self, gpu_arg_data, tmp_path): + # None is the write-path auto-detect sentinel and must stay valid. + p = str(tmp_path / "gpu_none_2819.tif") + out = to_geotiff(gpu_arg_data, p, gpu=None) + assert out == p + + def test_to_geotiff_gpu_false_writes(self, gpu_arg_data, tmp_path): + p = str(tmp_path / "gpu_false_2819.tif") + out = to_geotiff(gpu_arg_data, p, gpu=False) + assert out == p + + def test_to_geotiff_gpu_numpy_bool_accepted(self, gpu_arg_data, tmp_path): + # np.bool_ is a real bool subclass; accept it like Python bool. + p = str(tmp_path / "gpu_npbool_2819.tif") + out = to_geotiff(gpu_arg_data, p, gpu=np.bool_(False)) + assert out == p From 4cfc329058fe9b9bed1afa600807e200db6a2c2b Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Tue, 2 Jun 2026 10:03:55 -0700 Subject: [PATCH 2/2] Address review nit: clarify gpu param vs legacy alias in docstring (#2819) --- xrspatial/geotiff/_validation.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/xrspatial/geotiff/_validation.py b/xrspatial/geotiff/_validation.py index db4f47a6b..5dfe3b990 100644 --- a/xrspatial/geotiff/_validation.py +++ b/xrspatial/geotiff/_validation.py @@ -440,6 +440,12 @@ def _validate_dispatch_kwargs( gpu : bool True when the call routes through the GPU pipeline (either ``open_geotiff(gpu=True)`` or a direct ``read_geotiff_gpu``). + This is the dispatch bool, type-checked via + ``_validate_gpu_arg``. Not to be confused with + ``read_geotiff_gpu``'s own deprecated ``gpu='strict'/'auto'/ + 'loose'`` string parameter (the legacy ``on_gpu_failure`` + alias), which never reaches this helper -- ``read_geotiff_gpu`` + passes a literal ``True`` here. chunks : int, tuple, or None Caller's ``chunks=`` value. ``None`` means eager. overview_level