Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 19 additions & 4 deletions xrspatial/geotiff/_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,7 +232,8 @@ def _validate_dtype_cast(source_dtype, target_dtype):
f"Cast explicitly after reading if you really want this.")


def _validate_tile_size(tile_size) -> None:
def _validate_tile_size(tile_size, *, require_multiple_of_16: bool = True
) -> None:
"""Validate ``tile_size`` for the tiled GeoTIFF writers.

Shared by ``to_geotiff`` (when ``tiled=True``) and
Expand All @@ -246,6 +247,15 @@ def _validate_tile_size(tile_size) -> None:
for broad interoperability with libtiff / GDAL strict readers; a
value like 17 would otherwise round-trip through the in-repo
reader but be rejected elsewhere.

``require_multiple_of_16`` defaults to ``True`` for the public
boundary (``to_geotiff`` / ``_write_geotiff_gpu``), which enforces
spec interop. The array-level private writers (``_write`` /
``_write_streaming``) pass ``False``: they still reject the
crash-inducing cases (non-int, non-positive) but allow the small
spec-noncompliant tile sizes that the in-repo reader round-trips
and that the internal tests rely on. The multiple-of-16 policy
stays at the public boundary (issue #2997).
"""
if not isinstance(tile_size, (int, np.integer)) or isinstance(
tile_size, bool):
Expand All @@ -255,7 +265,7 @@ def _validate_tile_size(tile_size) -> None:
if tile_size <= 0:
raise ValueError(
f"tile_size must be a positive int, got tile_size={tile_size}.")
if tile_size % 16 != 0:
if require_multiple_of_16 and tile_size % 16 != 0:
lower = (int(tile_size) // 16) * 16
upper = lower + 16
# ``lower`` is 0 for tile_size < 16; suppress it from the hint
Expand Down Expand Up @@ -324,14 +334,19 @@ def _validate_chunks_arg(chunks, *, allow_none=False):
f"(type {type(chunks).__name__}).")


def _validate_tile_size_arg(tile_size):
def _validate_tile_size_arg(tile_size, *,
require_multiple_of_16: bool = True):
"""Validate the ``tile_size`` kwarg for the tiled writer entry points.

Wrapper kept for backwards internal compatibility; delegates to
``_validate_tile_size`` so to_geotiff/_write_geotiff_gpu share one
validation path (positive int + multiple-of-16 for tiled output).
Pass ``require_multiple_of_16=False`` from the array-level private
writers, which enforce only the crash-inducing positivity/type rules
(issue #2997).
"""
_validate_tile_size(tile_size)
_validate_tile_size(
tile_size, require_multiple_of_16=require_multiple_of_16)


def _validate_overview_level_arg(overview_level) -> None:
Expand Down
58 changes: 36 additions & 22 deletions xrspatial/geotiff/_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -421,23 +421,21 @@ def _write(data: np.ndarray, path: str, *,
entry_point="_write",
)

# Defense in depth for the COG auto-overview hang.
# ``to_geotiff`` already rejects non-positive tile_size when either
# tiled or cog is true, but ``_write`` is a public-ish array-level
# entry point reached from the GPU CPU-fallback path and downstream
# code that imports it directly. Without the gate, a non-positive
# tile_size with ``cog=True`` hits ``ZeroDivisionError`` in
# ``_write_tiled`` (tiled=True path) or the infinite auto-overview
# halving loop (tiled=False path). Convert both to a typed
# ValueError up front so callers see one actionable failure mode
# instead of two divergent ones.
if cog and (not isinstance(tile_size, (int, np.integer))
or isinstance(tile_size, bool)
or tile_size <= 0):
raise ValueError(
f"tile_size must be a positive int for cog=True (consumed by "
f"tile encoding and auto-overview generation), got "
f"tile_size={tile_size!r}.")
# Push down the tile_size positivity/type contract. ``_write`` is a
# public-ish array-level entry point reached from the GPU
# CPU-fallback path and downstream code that imports it directly,
# so the gate has to run here. Without it a non-positive tile_size
# hits ``ZeroDivisionError`` in ``_write_tiled`` (tiled=True) or the
# auto-overview halving loop (cog=True). Validate whenever either
# path will consume tile_size, matching the public wrapper's
# ``tiled or cog`` gate. The multiple-of-16 spec rule stays at the
# public ``to_geotiff`` boundary: the array-level writer is the
# sharper tool that the in-repo reader and the internal tests use
# with small spec-noncompliant tiles, so pass
# ``require_multiple_of_16=False`` here (issue #2997).
if tiled or cog:
from ._validation import _validate_tile_size_arg
_validate_tile_size_arg(tile_size, require_multiple_of_16=False)

# Auto-promote ``float16`` and ``bool_`` before the dtype mapper.
# ``to_geotiff`` already does this upstream; the
Expand Down Expand Up @@ -552,11 +550,14 @@ def _write(data: np.ndarray, path: str, *,
# identically.
#
# Defense in depth: require a positive tile_size
# before entering the halving loop. The public ``to_geotiff``
# entry point already validates tile_size when either tiled or
# cog is true, but ``write()`` is also reachable from internal
# callers; without this guard a non-positive tile_size leaves
# the ``oh > tile_size`` termination condition permanently true
# before entering the halving loop. The top-of-``_write``
# ``if tiled or cog`` gate (and the public ``to_geotiff``
# entry point) already reject a non-positive tile_size before
# reaching here, so this branch is unreachable at runtime.
# It is kept deliberately as the loop-side defense pinned by
# ``test_inner_overview_loop_guard_message_is_pinned`` (#2311):
# without this guard a non-positive tile_size leaves the
# ``oh > tile_size`` termination condition permanently true
# while the inner ``oh > 0`` guard suppresses appends, so the
# loop spins forever.
if (not isinstance(tile_size, (int, np.integer))
Expand Down Expand Up @@ -762,6 +763,19 @@ def _write_streaming(dask_data, path: str, *,
entry_point="_write_streaming",
)

# Push down the tile_size positivity/type contract. The streaming
# layout block below computes ``math.ceil(width / tw)`` with
# ``tw = tile_size``, so ``tile_size=0`` raises a bare
# ``ZeroDivisionError``. ``to_geotiff`` validates this upstream, so
# it is a no-op on that path; the gate matters for direct callers of
# ``_write_streaming``. Only the tiled layout consumes tile_size (the
# strip path ignores it). As with ``_write``, the multiple-of-16 spec
# rule stays at the public boundary, so pass
# ``require_multiple_of_16=False`` (issue #2997).
if tiled:
from ._validation import _validate_tile_size_arg
_validate_tile_size_arg(tile_size, require_multiple_of_16=False)

height, width = dask_data.shape[:2]
samples = dask_data.shape[2] if dask_data.ndim == 3 else 1
dtype = dask_data.dtype
Expand Down
82 changes: 82 additions & 0 deletions xrspatial/geotiff/tests/write/test_basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -1982,6 +1982,88 @@ def test_write_streaming_rejects_bool_crs_epsg(self, tmp_path):
_write_streaming(arr, out, crs_epsg=True)


class TestTileSizePushdown:
"""The array-level writers used to under-enforce ``tile_size``:
``_write_streaming`` skipped the check entirely (``tile_size=0`` hit a
bare ``ZeroDivisionError`` in the ``math.ceil(width / tw)`` layout
math) and ``_write`` only checked positivity under ``cog=True``. Both
now reject the crash-inducing cases (non-int, non-positive) on any
path that consumes ``tile_size``.

The multiple-of-16 TIFF spec rule is deliberately NOT enforced here:
the array-level writer is the lower-level tool, and the in-repo reader
plus the existing COG/round-trip tests use small spec-noncompliant
tiles (4, 8). That policy stays at the public ``to_geotiff`` boundary
(issue #2997)."""

@pytest.mark.parametrize("tile_size", [0, -1, 256.0])
def test_write_tiled_rejects_bad_tile_size(self, tmp_path, tile_size):
arr = _make_uint8_band_2138()
out = str(tmp_path / f"tmp_2997_write_tiled_{tile_size}.tif")
with pytest.raises(ValueError, match="tile_size"):
_write(arr, out, tiled=True, tile_size=tile_size)

@pytest.mark.parametrize("tile_size", [0, -1, 256.0])
def test_write_streaming_rejects_bad_tile_size(self, tmp_path,
tile_size):
arr = dsk.from_array(_make_uint8_band_2138(), chunks=(16, 16))
out = str(tmp_path / f"tmp_2997_stream_tiled_{tile_size}.tif")
with pytest.raises(ValueError, match="tile_size"):
_write_streaming(arr, out, tiled=True, tile_size=tile_size)

def test_write_cog_rejects_zero_tile_size(self, tmp_path):
"""``cog=True`` consumes ``tile_size`` for tiling and overview
generation; a non-positive value must still be rejected."""
arr = _make_uint8_band_2138(shape=(64, 64))
out = str(tmp_path / "tmp_2997_write_cog_0.tif")
with pytest.raises(ValueError, match="tile_size"):
_write(arr, out, tiled=True, cog=True, tile_size=0)

def test_write_tiled_allows_non_multiple_of_16(self, tmp_path):
"""The array-level writer keeps accepting small spec-noncompliant
tiles; the multiple-of-16 rule is a public-boundary policy."""
arr = _make_uint8_band_2138()
out = str(tmp_path / "tmp_2997_write_tiled_17.tif")
_write(arr, out, tiled=True, tile_size=17)
assert os.path.exists(out) and os.path.getsize(out) > 0

def test_write_streaming_allows_non_multiple_of_16(self, tmp_path):
arr = dsk.from_array(_make_uint8_band_2138(), chunks=(16, 16))
out = str(tmp_path / "tmp_2997_stream_tiled_17.tif")
_write_streaming(arr, out, tiled=True, tile_size=17)
assert os.path.exists(out) and os.path.getsize(out) > 0

def test_write_strip_ignores_tile_size(self, tmp_path):
"""``tiled=False`` does not consume ``tile_size``; the gate must
not fire there."""
arr = _make_uint8_band_2138()
out = str(tmp_path / "tmp_2997_write_strip_0.tif")
_write(arr, out, tiled=False, tile_size=0)
assert os.path.exists(out) and os.path.getsize(out) > 0

def test_write_streaming_strip_ignores_tile_size(self, tmp_path):
arr = dsk.from_array(_make_uint8_band_2138(), chunks=(16, 16))
out = str(tmp_path / "tmp_2997_stream_strip_0.tif")
_write_streaming(arr, out, tiled=False, tile_size=0)
assert os.path.exists(out) and os.path.getsize(out) > 0

def test_write_tiled_valid_tile_size_writes(self, tmp_path):
arr = _make_uint8_band_2138()
out = str(tmp_path / "tmp_2997_write_tiled_16.tif")
_write(arr, out, tiled=True, tile_size=16)
assert os.path.exists(out) and os.path.getsize(out) > 0

def test_public_path_still_enforces_multiple_of_16(self, tmp_path):
"""Guard against accidentally loosening the public boundary: the
multiple-of-16 rule must still fire from ``to_geotiff``."""
import xarray as xr
arr = _make_uint8_band_2138()
da = xr.DataArray(arr, dims=["y", "x"])
out = str(tmp_path / "tmp_2997_public_17.tif")
with pytest.raises(ValueError, match=r"multiple of 16"):
to_geotiff(da, out, tiled=True, tile_size=17)


class TestNanToSentinelDefensiveCopy:
"""``to_geotiff`` rewrites NaN pixels to the nodata sentinel via
``arr.copy()`` so the caller's buffer is never mutated. Direct
Expand Down
Loading