diff --git a/xrspatial/geotiff/_validation.py b/xrspatial/geotiff/_validation.py index d5b5e7b9e..d19564adc 100644 --- a/xrspatial/geotiff/_validation.py +++ b/xrspatial/geotiff/_validation.py @@ -232,7 +232,8 @@ def _validate_dtype_cast(source_dtype, target_dtype): f"Cast explicitly after reading if you really want this.") -def _validate_tile_size(tile_size) -> None: +def _validate_tile_size(tile_size, *, require_multiple_of_16: bool = True + ) -> None: """Validate ``tile_size`` for the tiled GeoTIFF writers. Shared by ``to_geotiff`` (when ``tiled=True``) and @@ -246,6 +247,15 @@ def _validate_tile_size(tile_size) -> None: for broad interoperability with libtiff / GDAL strict readers; a value like 17 would otherwise round-trip through the in-repo reader but be rejected elsewhere. + + ``require_multiple_of_16`` defaults to ``True`` for the public + boundary (``to_geotiff`` / ``_write_geotiff_gpu``), which enforces + spec interop. The array-level private writers (``_write`` / + ``_write_streaming``) pass ``False``: they still reject the + crash-inducing cases (non-int, non-positive) but allow the small + spec-noncompliant tile sizes that the in-repo reader round-trips + and that the internal tests rely on. The multiple-of-16 policy + stays at the public boundary (issue #2997). """ if not isinstance(tile_size, (int, np.integer)) or isinstance( tile_size, bool): @@ -255,7 +265,7 @@ def _validate_tile_size(tile_size) -> None: if tile_size <= 0: raise ValueError( f"tile_size must be a positive int, got tile_size={tile_size}.") - if tile_size % 16 != 0: + if require_multiple_of_16 and tile_size % 16 != 0: lower = (int(tile_size) // 16) * 16 upper = lower + 16 # ``lower`` is 0 for tile_size < 16; suppress it from the hint @@ -324,14 +334,19 @@ def _validate_chunks_arg(chunks, *, allow_none=False): f"(type {type(chunks).__name__}).") -def _validate_tile_size_arg(tile_size): +def _validate_tile_size_arg(tile_size, *, + require_multiple_of_16: bool = True): """Validate the ``tile_size`` kwarg for the tiled writer entry points. Wrapper kept for backwards internal compatibility; delegates to ``_validate_tile_size`` so to_geotiff/_write_geotiff_gpu share one validation path (positive int + multiple-of-16 for tiled output). + Pass ``require_multiple_of_16=False`` from the array-level private + writers, which enforce only the crash-inducing positivity/type rules + (issue #2997). """ - _validate_tile_size(tile_size) + _validate_tile_size( + tile_size, require_multiple_of_16=require_multiple_of_16) def _validate_overview_level_arg(overview_level) -> None: diff --git a/xrspatial/geotiff/_writer.py b/xrspatial/geotiff/_writer.py index 3fef0a04b..910055eaf 100644 --- a/xrspatial/geotiff/_writer.py +++ b/xrspatial/geotiff/_writer.py @@ -421,23 +421,21 @@ def _write(data: np.ndarray, path: str, *, entry_point="_write", ) - # Defense in depth for the COG auto-overview hang. - # ``to_geotiff`` already rejects non-positive tile_size when either - # tiled or cog is true, but ``_write`` is a public-ish array-level - # entry point reached from the GPU CPU-fallback path and downstream - # code that imports it directly. Without the gate, a non-positive - # tile_size with ``cog=True`` hits ``ZeroDivisionError`` in - # ``_write_tiled`` (tiled=True path) or the infinite auto-overview - # halving loop (tiled=False path). Convert both to a typed - # ValueError up front so callers see one actionable failure mode - # instead of two divergent ones. - if cog and (not isinstance(tile_size, (int, np.integer)) - or isinstance(tile_size, bool) - or tile_size <= 0): - raise ValueError( - f"tile_size must be a positive int for cog=True (consumed by " - f"tile encoding and auto-overview generation), got " - f"tile_size={tile_size!r}.") + # Push down the tile_size positivity/type contract. ``_write`` is a + # public-ish array-level entry point reached from the GPU + # CPU-fallback path and downstream code that imports it directly, + # so the gate has to run here. Without it a non-positive tile_size + # hits ``ZeroDivisionError`` in ``_write_tiled`` (tiled=True) or the + # auto-overview halving loop (cog=True). Validate whenever either + # path will consume tile_size, matching the public wrapper's + # ``tiled or cog`` gate. The multiple-of-16 spec rule stays at the + # public ``to_geotiff`` boundary: the array-level writer is the + # sharper tool that the in-repo reader and the internal tests use + # with small spec-noncompliant tiles, so pass + # ``require_multiple_of_16=False`` here (issue #2997). + if tiled or cog: + from ._validation import _validate_tile_size_arg + _validate_tile_size_arg(tile_size, require_multiple_of_16=False) # Auto-promote ``float16`` and ``bool_`` before the dtype mapper. # ``to_geotiff`` already does this upstream; the @@ -552,11 +550,14 @@ def _write(data: np.ndarray, path: str, *, # identically. # # Defense in depth: require a positive tile_size - # before entering the halving loop. The public ``to_geotiff`` - # entry point already validates tile_size when either tiled or - # cog is true, but ``write()`` is also reachable from internal - # callers; without this guard a non-positive tile_size leaves - # the ``oh > tile_size`` termination condition permanently true + # before entering the halving loop. The top-of-``_write`` + # ``if tiled or cog`` gate (and the public ``to_geotiff`` + # entry point) already reject a non-positive tile_size before + # reaching here, so this branch is unreachable at runtime. + # It is kept deliberately as the loop-side defense pinned by + # ``test_inner_overview_loop_guard_message_is_pinned`` (#2311): + # without this guard a non-positive tile_size leaves the + # ``oh > tile_size`` termination condition permanently true # while the inner ``oh > 0`` guard suppresses appends, so the # loop spins forever. if (not isinstance(tile_size, (int, np.integer)) @@ -762,6 +763,19 @@ def _write_streaming(dask_data, path: str, *, entry_point="_write_streaming", ) + # Push down the tile_size positivity/type contract. The streaming + # layout block below computes ``math.ceil(width / tw)`` with + # ``tw = tile_size``, so ``tile_size=0`` raises a bare + # ``ZeroDivisionError``. ``to_geotiff`` validates this upstream, so + # it is a no-op on that path; the gate matters for direct callers of + # ``_write_streaming``. Only the tiled layout consumes tile_size (the + # strip path ignores it). As with ``_write``, the multiple-of-16 spec + # rule stays at the public boundary, so pass + # ``require_multiple_of_16=False`` (issue #2997). + if tiled: + from ._validation import _validate_tile_size_arg + _validate_tile_size_arg(tile_size, require_multiple_of_16=False) + height, width = dask_data.shape[:2] samples = dask_data.shape[2] if dask_data.ndim == 3 else 1 dtype = dask_data.dtype diff --git a/xrspatial/geotiff/tests/write/test_basic.py b/xrspatial/geotiff/tests/write/test_basic.py index 0b827d6be..57d7a5dac 100644 --- a/xrspatial/geotiff/tests/write/test_basic.py +++ b/xrspatial/geotiff/tests/write/test_basic.py @@ -1982,6 +1982,88 @@ def test_write_streaming_rejects_bool_crs_epsg(self, tmp_path): _write_streaming(arr, out, crs_epsg=True) +class TestTileSizePushdown: + """The array-level writers used to under-enforce ``tile_size``: + ``_write_streaming`` skipped the check entirely (``tile_size=0`` hit a + bare ``ZeroDivisionError`` in the ``math.ceil(width / tw)`` layout + math) and ``_write`` only checked positivity under ``cog=True``. Both + now reject the crash-inducing cases (non-int, non-positive) on any + path that consumes ``tile_size``. + + The multiple-of-16 TIFF spec rule is deliberately NOT enforced here: + the array-level writer is the lower-level tool, and the in-repo reader + plus the existing COG/round-trip tests use small spec-noncompliant + tiles (4, 8). That policy stays at the public ``to_geotiff`` boundary + (issue #2997).""" + + @pytest.mark.parametrize("tile_size", [0, -1, 256.0]) + def test_write_tiled_rejects_bad_tile_size(self, tmp_path, tile_size): + arr = _make_uint8_band_2138() + out = str(tmp_path / f"tmp_2997_write_tiled_{tile_size}.tif") + with pytest.raises(ValueError, match="tile_size"): + _write(arr, out, tiled=True, tile_size=tile_size) + + @pytest.mark.parametrize("tile_size", [0, -1, 256.0]) + def test_write_streaming_rejects_bad_tile_size(self, tmp_path, + tile_size): + arr = dsk.from_array(_make_uint8_band_2138(), chunks=(16, 16)) + out = str(tmp_path / f"tmp_2997_stream_tiled_{tile_size}.tif") + with pytest.raises(ValueError, match="tile_size"): + _write_streaming(arr, out, tiled=True, tile_size=tile_size) + + def test_write_cog_rejects_zero_tile_size(self, tmp_path): + """``cog=True`` consumes ``tile_size`` for tiling and overview + generation; a non-positive value must still be rejected.""" + arr = _make_uint8_band_2138(shape=(64, 64)) + out = str(tmp_path / "tmp_2997_write_cog_0.tif") + with pytest.raises(ValueError, match="tile_size"): + _write(arr, out, tiled=True, cog=True, tile_size=0) + + def test_write_tiled_allows_non_multiple_of_16(self, tmp_path): + """The array-level writer keeps accepting small spec-noncompliant + tiles; the multiple-of-16 rule is a public-boundary policy.""" + arr = _make_uint8_band_2138() + out = str(tmp_path / "tmp_2997_write_tiled_17.tif") + _write(arr, out, tiled=True, tile_size=17) + assert os.path.exists(out) and os.path.getsize(out) > 0 + + def test_write_streaming_allows_non_multiple_of_16(self, tmp_path): + arr = dsk.from_array(_make_uint8_band_2138(), chunks=(16, 16)) + out = str(tmp_path / "tmp_2997_stream_tiled_17.tif") + _write_streaming(arr, out, tiled=True, tile_size=17) + assert os.path.exists(out) and os.path.getsize(out) > 0 + + def test_write_strip_ignores_tile_size(self, tmp_path): + """``tiled=False`` does not consume ``tile_size``; the gate must + not fire there.""" + arr = _make_uint8_band_2138() + out = str(tmp_path / "tmp_2997_write_strip_0.tif") + _write(arr, out, tiled=False, tile_size=0) + assert os.path.exists(out) and os.path.getsize(out) > 0 + + def test_write_streaming_strip_ignores_tile_size(self, tmp_path): + arr = dsk.from_array(_make_uint8_band_2138(), chunks=(16, 16)) + out = str(tmp_path / "tmp_2997_stream_strip_0.tif") + _write_streaming(arr, out, tiled=False, tile_size=0) + assert os.path.exists(out) and os.path.getsize(out) > 0 + + def test_write_tiled_valid_tile_size_writes(self, tmp_path): + arr = _make_uint8_band_2138() + out = str(tmp_path / "tmp_2997_write_tiled_16.tif") + _write(arr, out, tiled=True, tile_size=16) + assert os.path.exists(out) and os.path.getsize(out) > 0 + + def test_public_path_still_enforces_multiple_of_16(self, tmp_path): + """Guard against accidentally loosening the public boundary: the + multiple-of-16 rule must still fire from ``to_geotiff``.""" + import xarray as xr + arr = _make_uint8_band_2138() + da = xr.DataArray(arr, dims=["y", "x"]) + out = str(tmp_path / "tmp_2997_public_17.tif") + with pytest.raises(ValueError, match=r"multiple of 16"): + to_geotiff(da, out, tiled=True, tile_size=17) + + class TestNanToSentinelDefensiveCopy: """``to_geotiff`` rewrites NaN pixels to the nodata sentinel via ``arr.copy()`` so the caller's buffer is never mutated. Direct