Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,15 @@

#### Fixed

- `reproject` now rejects an explicit `nodata=` value that does not
fit the source/output integer dtype range. Previously the worker's
cast-back step silently wrapped the sentinel (e.g. `-9999` in a
`uint8` array landed at `0`), so out-of-bounds output pixels were
the same as valid zero pixels while `attrs['nodata']` still
advertised `-9999`. Explicit out-of-range values now raise
`ValueError`; attrs-derived out-of-range values (legacy files
such as `uint16 + nodata=-9999`) emit a `UserWarning` and fall
back to the dtype-appropriate sentinel. (#2572)
- `zonal.stats` on the cupy backend now agrees with the numpy and dask
backends on two edge cases. A zone whose values are all NaN (or all
equal to `nodata_values`) is preserved in the output with NaN stats
Expand Down
7 changes: 6 additions & 1 deletion xrspatial/reproject/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -680,7 +680,12 @@ def reproject(
resampling : str
One of 'nearest', 'bilinear', 'cubic'.
nodata : float or None
Nodata value. Auto-detected if None.
Nodata value. Auto-detected if None. For integer input dtypes,
an explicit value that does not fit the dtype range raises
``ValueError`` (e.g. ``nodata=-9999`` with a ``uint8`` raster).
Attrs/rioxarray-derived out-of-range values emit a
``UserWarning`` and fall back to ``dtype.min`` for signed or
``dtype.max`` for unsigned so legacy files still load (#2572).
transform_precision : int
Control-grid subdivisions for the coordinate transform (default 16).
Higher values increase accuracy at the cost of more pyproj calls.
Expand Down
65 changes: 52 additions & 13 deletions xrspatial/reproject/_crs_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
"""
from __future__ import annotations

import warnings

import numpy as np

from xrspatial.reproject._lite_crs import CRS as LiteCRS
Expand Down Expand Up @@ -135,24 +137,61 @@ def _detect_nodata(raster, nodata=None, dtype=None):
is rejected.

Integer dtypes can't carry NaN. When *dtype* is integer the
resolved nodata is checked against the dtype: a NaN coming from
upstream (or the absent-upstream default) is swapped for a
dtype-appropriate sentinel via :func:`_default_integer_nodata`
(signed -> ``dtype.min``, unsigned -> ``dtype.max``). Without that
swap, the worker's cast-back step silently turned every
out-of-bounds pixel into ``0`` while ``attrs['nodata']`` still
advertised NaN (#2185).
resolved nodata is checked against the dtype:

* NaN (from upstream or the absent-upstream default) is swapped
for a dtype-appropriate sentinel via
:func:`_default_integer_nodata` (signed -> ``dtype.min``,
unsigned -> ``dtype.max``). Without that swap, the worker's
cast-back step silently turned every out-of-bounds pixel into
``0`` while ``attrs['nodata']`` still advertised NaN (#2185).
* A finite value that does not fit the dtype range is rejected
when the caller passed it explicitly (raises ``ValueError``),
or swapped for the dtype default with a ``UserWarning`` when it
came in via attrs/rioxarray. Without that guard, the worker's
cast-back step wraps the sentinel (e.g. ``-9999`` into
``uint8`` becomes ``0``) while ``attrs['nodata']`` still
advertises the original value (#2572).
"""
nd = _detect_nodata_raw(raster, nodata)

# For integer outputs, swap any resolved NaN for a sentinel that
# actually fits the dtype. Finite values (including the
# user-supplied ones) pass through untouched so explicit nodata
# always wins.
if dtype is not None and np.isnan(nd):
if dtype is not None:
dt = np.dtype(dtype)
if np.issubdtype(dt, np.integer):
return _default_integer_nodata(dt)
if np.isnan(nd):
# Swap NaN for a dtype-appropriate sentinel.
return _default_integer_nodata(dt)

info = np.iinfo(dt)
if not (info.min <= nd <= info.max):
# Finite but out of range. Explicit arg = hard error;
# implicit (attrs/rioxarray) = swap with warning so
# legacy files (e.g. uint16 + nodata=-9999) still load.
if nodata is not None:
raise ValueError(
f"nodata={nodata!r} cannot be represented in "
f"dtype {dt}: valid range is "
f"[{info.min}, {info.max}]. The worker's cast "
f"step would silently wrap the sentinel (e.g. "
f"-9999 into uint8 becomes 0), so out-of-range "
f"output pixels would be indistinguishable "
f"from valid data. Pass a representable nodata "
f"or use a wider dtype."
)
fallback = _default_integer_nodata(dt)
# stacklevel=3 surfaces the warning at the public
# reproject() call (user -> reproject -> _detect_nodata
# -> warn) so the user sees the location they can act on.
warnings.warn(
f"Raster attrs declare nodata={nd!r}, which is "
f"outside the {dt} range "
f"[{info.min}, {info.max}]. Using {fallback!r} "
f"instead so the worker's cast-back step does not "
f"silently wrap the sentinel (#2572).",
UserWarning,
stacklevel=3,
)
return fallback

return nd

Expand Down
117 changes: 117 additions & 0 deletions xrspatial/tests/test_reproject.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,123 @@ def test_detect_nodata_from_attrs(self):
assert val == -1.0


class TestDetectNodataDtypeRange:
"""Regression coverage for #2572.

Explicit out-of-range nodata used to silently wrap during the
worker's cast-back step (e.g. ``-9999`` in a ``uint8`` array landed
at ``0``) while ``attrs['nodata']`` kept advertising the original
value. ``_detect_nodata`` must reject the explicit case and warn
on the attrs-derived case.
"""

def test_explicit_negative_nodata_for_uint8_raises(self):
from xrspatial.reproject._crs_utils import _detect_nodata
raster = _make_raster(np.zeros((4, 4), dtype=np.uint8))
with pytest.raises(ValueError, match="uint8"):
_detect_nodata(raster, nodata=-9999, dtype=np.uint8)

def test_explicit_too_large_nodata_for_uint16_raises(self):
from xrspatial.reproject._crs_utils import _detect_nodata
raster = _make_raster(np.zeros((4, 4), dtype=np.uint16))
with pytest.raises(ValueError, match="uint16"):
_detect_nodata(raster, nodata=70000, dtype=np.uint16)

def test_explicit_in_range_nodata_passes(self):
"""Boundary case: dtype.max stays untouched."""
from xrspatial.reproject._crs_utils import _detect_nodata
raster = _make_raster(np.zeros((4, 4), dtype=np.uint8))
assert _detect_nodata(raster, nodata=255, dtype=np.uint8) == 255.0

def test_explicit_in_range_signed_min(self):
from xrspatial.reproject._crs_utils import _detect_nodata
raster = _make_raster(np.zeros((4, 4), dtype=np.int16))
assert _detect_nodata(raster, nodata=-32768, dtype=np.int16) == -32768.0

def test_attrs_out_of_range_warns_and_falls_back(self):
"""Legacy files (uint16 + nodata=-9999) should warn, not crash."""
from xrspatial.reproject._crs_utils import _detect_nodata
# nodata in attrs is out of range for uint16
raster = _make_raster(np.zeros((4, 4), dtype=np.uint16), nodata=-9999)
with pytest.warns(UserWarning, match="uint16"):
val = _detect_nodata(raster, dtype=np.uint16)
# Falls back to dtype.max for unsigned
assert val == float(np.iinfo(np.uint16).max)


class TestReprojectIntegerNodataDtypeRange:
"""End-to-end regression coverage for #2572 through ``reproject()``."""

def test_reproject_uint8_negative_nodata_raises(self):
from xrspatial.reproject import reproject
arr = (np.ones((4, 4), dtype=np.uint8) * 10)
da_obj = xr.DataArray(
arr, dims=['y', 'x'],
coords={'y': np.linspace(40, 30, 4),
'x': np.linspace(-5, 5, 4)},
attrs={'crs': 'EPSG:4326'},
)
with pytest.raises(ValueError, match="uint8"):
reproject(da_obj, 'EPSG:4326', nodata=-9999)

def test_reproject_uint16_too_large_nodata_raises(self):
from xrspatial.reproject import reproject
arr = (np.ones((4, 4), dtype=np.uint16) * 10)
da_obj = xr.DataArray(
arr, dims=['y', 'x'],
coords={'y': np.linspace(40, 30, 4),
'x': np.linspace(-5, 5, 4)},
attrs={'crs': 'EPSG:4326'},
)
with pytest.raises(ValueError, match="uint16"):
reproject(da_obj, 'EPSG:4326', nodata=70000)

def test_reproject_uint8_in_range_nodata_writes_correct_pixels(self):
"""Happy path: representable nodata produces non-corrupted output
where unfilled pixels carry the declared sentinel.
"""
from xrspatial.reproject import reproject
arr = (np.ones((4, 4), dtype=np.uint8) * 10)
da_obj = xr.DataArray(
arr, dims=['y', 'x'],
coords={'y': np.linspace(40, 30, 4),
'x': np.linspace(-5, 5, 4)},
attrs={'crs': 'EPSG:4326'},
)
# Expand bounds so the output has nodata around the edges.
out = reproject(
da_obj, 'EPSG:4326', nodata=255,
bounds=(-20, 20, 20, 50),
)
assert out.dtype == np.uint8
assert out.attrs['nodata'] == 255.0
# The sentinel actually appears in the array (no silent wrap).
n_nodata = int((out.values == 255).sum())
assert n_nodata > 0
# The entire top row sits above the source extent (y=40 is the
# source max), so it must be all nodata, not a mix of 0 and 255.
assert (out.values[0, :] == 255).all(), (
f"top row should be all nodata=255, got {out.values[0, :]}"
)

def test_reproject_int16_negative_nodata_works(self):
from xrspatial.reproject import reproject
arr = (np.ones((4, 4), dtype=np.int16) * 10)
da_obj = xr.DataArray(
arr, dims=['y', 'x'],
coords={'y': np.linspace(40, 30, 4),
'x': np.linspace(-5, 5, 4)},
attrs={'crs': 'EPSG:4326'},
)
out = reproject(
da_obj, 'EPSG:4326', nodata=-32768,
bounds=(-20, 20, 20, 50),
)
assert out.dtype == np.int16
assert out.attrs['nodata'] == -32768.0
assert (out.values == -32768).any()


# ---------------------------------------------------------------------------
# ApproximateTransform
# ---------------------------------------------------------------------------
Expand Down
Loading