diff --git a/CHANGELOG.md b/CHANGELOG.md index b706a578d..2eb36737a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,6 +21,15 @@ #### Fixed +- `reproject` now rejects an explicit `nodata=` value that does not + fit the source/output integer dtype range. Previously the worker's + cast-back step silently wrapped the sentinel (e.g. `-9999` in a + `uint8` array landed at `0`), so out-of-bounds output pixels were + the same as valid zero pixels while `attrs['nodata']` still + advertised `-9999`. Explicit out-of-range values now raise + `ValueError`; attrs-derived out-of-range values (legacy files + such as `uint16 + nodata=-9999`) emit a `UserWarning` and fall + back to the dtype-appropriate sentinel. (#2572) - `zonal.stats` on the cupy backend now agrees with the numpy and dask backends on two edge cases. A zone whose values are all NaN (or all equal to `nodata_values`) is preserved in the output with NaN stats diff --git a/xrspatial/reproject/__init__.py b/xrspatial/reproject/__init__.py index 1c38b4f6d..adb115e94 100644 --- a/xrspatial/reproject/__init__.py +++ b/xrspatial/reproject/__init__.py @@ -680,7 +680,12 @@ def reproject( resampling : str One of 'nearest', 'bilinear', 'cubic'. nodata : float or None - Nodata value. Auto-detected if None. + Nodata value. Auto-detected if None. For integer input dtypes, + an explicit value that does not fit the dtype range raises + ``ValueError`` (e.g. ``nodata=-9999`` with a ``uint8`` raster). + Attrs/rioxarray-derived out-of-range values emit a + ``UserWarning`` and fall back to ``dtype.min`` for signed or + ``dtype.max`` for unsigned so legacy files still load (#2572). transform_precision : int Control-grid subdivisions for the coordinate transform (default 16). Higher values increase accuracy at the cost of more pyproj calls. diff --git a/xrspatial/reproject/_crs_utils.py b/xrspatial/reproject/_crs_utils.py index 03712f89e..0bf18d705 100644 --- a/xrspatial/reproject/_crs_utils.py +++ b/xrspatial/reproject/_crs_utils.py @@ -5,6 +5,8 @@ """ from __future__ import annotations +import warnings + import numpy as np from xrspatial.reproject._lite_crs import CRS as LiteCRS @@ -135,24 +137,61 @@ def _detect_nodata(raster, nodata=None, dtype=None): is rejected. Integer dtypes can't carry NaN. When *dtype* is integer the - resolved nodata is checked against the dtype: a NaN coming from - upstream (or the absent-upstream default) is swapped for a - dtype-appropriate sentinel via :func:`_default_integer_nodata` - (signed -> ``dtype.min``, unsigned -> ``dtype.max``). Without that - swap, the worker's cast-back step silently turned every - out-of-bounds pixel into ``0`` while ``attrs['nodata']`` still - advertised NaN (#2185). + resolved nodata is checked against the dtype: + + * NaN (from upstream or the absent-upstream default) is swapped + for a dtype-appropriate sentinel via + :func:`_default_integer_nodata` (signed -> ``dtype.min``, + unsigned -> ``dtype.max``). Without that swap, the worker's + cast-back step silently turned every out-of-bounds pixel into + ``0`` while ``attrs['nodata']`` still advertised NaN (#2185). + * A finite value that does not fit the dtype range is rejected + when the caller passed it explicitly (raises ``ValueError``), + or swapped for the dtype default with a ``UserWarning`` when it + came in via attrs/rioxarray. Without that guard, the worker's + cast-back step wraps the sentinel (e.g. ``-9999`` into + ``uint8`` becomes ``0``) while ``attrs['nodata']`` still + advertises the original value (#2572). """ nd = _detect_nodata_raw(raster, nodata) - # For integer outputs, swap any resolved NaN for a sentinel that - # actually fits the dtype. Finite values (including the - # user-supplied ones) pass through untouched so explicit nodata - # always wins. - if dtype is not None and np.isnan(nd): + if dtype is not None: dt = np.dtype(dtype) if np.issubdtype(dt, np.integer): - return _default_integer_nodata(dt) + if np.isnan(nd): + # Swap NaN for a dtype-appropriate sentinel. + return _default_integer_nodata(dt) + + info = np.iinfo(dt) + if not (info.min <= nd <= info.max): + # Finite but out of range. Explicit arg = hard error; + # implicit (attrs/rioxarray) = swap with warning so + # legacy files (e.g. uint16 + nodata=-9999) still load. + if nodata is not None: + raise ValueError( + f"nodata={nodata!r} cannot be represented in " + f"dtype {dt}: valid range is " + f"[{info.min}, {info.max}]. The worker's cast " + f"step would silently wrap the sentinel (e.g. " + f"-9999 into uint8 becomes 0), so out-of-range " + f"output pixels would be indistinguishable " + f"from valid data. Pass a representable nodata " + f"or use a wider dtype." + ) + fallback = _default_integer_nodata(dt) + # stacklevel=3 surfaces the warning at the public + # reproject() call (user -> reproject -> _detect_nodata + # -> warn) so the user sees the location they can act on. + warnings.warn( + f"Raster attrs declare nodata={nd!r}, which is " + f"outside the {dt} range " + f"[{info.min}, {info.max}]. Using {fallback!r} " + f"instead so the worker's cast-back step does not " + f"silently wrap the sentinel (#2572).", + UserWarning, + stacklevel=3, + ) + return fallback return nd diff --git a/xrspatial/tests/test_reproject.py b/xrspatial/tests/test_reproject.py index ccbdcd0e9..ef60606d8 100644 --- a/xrspatial/tests/test_reproject.py +++ b/xrspatial/tests/test_reproject.py @@ -139,6 +139,123 @@ def test_detect_nodata_from_attrs(self): assert val == -1.0 +class TestDetectNodataDtypeRange: + """Regression coverage for #2572. + + Explicit out-of-range nodata used to silently wrap during the + worker's cast-back step (e.g. ``-9999`` in a ``uint8`` array landed + at ``0``) while ``attrs['nodata']`` kept advertising the original + value. ``_detect_nodata`` must reject the explicit case and warn + on the attrs-derived case. + """ + + def test_explicit_negative_nodata_for_uint8_raises(self): + from xrspatial.reproject._crs_utils import _detect_nodata + raster = _make_raster(np.zeros((4, 4), dtype=np.uint8)) + with pytest.raises(ValueError, match="uint8"): + _detect_nodata(raster, nodata=-9999, dtype=np.uint8) + + def test_explicit_too_large_nodata_for_uint16_raises(self): + from xrspatial.reproject._crs_utils import _detect_nodata + raster = _make_raster(np.zeros((4, 4), dtype=np.uint16)) + with pytest.raises(ValueError, match="uint16"): + _detect_nodata(raster, nodata=70000, dtype=np.uint16) + + def test_explicit_in_range_nodata_passes(self): + """Boundary case: dtype.max stays untouched.""" + from xrspatial.reproject._crs_utils import _detect_nodata + raster = _make_raster(np.zeros((4, 4), dtype=np.uint8)) + assert _detect_nodata(raster, nodata=255, dtype=np.uint8) == 255.0 + + def test_explicit_in_range_signed_min(self): + from xrspatial.reproject._crs_utils import _detect_nodata + raster = _make_raster(np.zeros((4, 4), dtype=np.int16)) + assert _detect_nodata(raster, nodata=-32768, dtype=np.int16) == -32768.0 + + def test_attrs_out_of_range_warns_and_falls_back(self): + """Legacy files (uint16 + nodata=-9999) should warn, not crash.""" + from xrspatial.reproject._crs_utils import _detect_nodata + # nodata in attrs is out of range for uint16 + raster = _make_raster(np.zeros((4, 4), dtype=np.uint16), nodata=-9999) + with pytest.warns(UserWarning, match="uint16"): + val = _detect_nodata(raster, dtype=np.uint16) + # Falls back to dtype.max for unsigned + assert val == float(np.iinfo(np.uint16).max) + + +class TestReprojectIntegerNodataDtypeRange: + """End-to-end regression coverage for #2572 through ``reproject()``.""" + + def test_reproject_uint8_negative_nodata_raises(self): + from xrspatial.reproject import reproject + arr = (np.ones((4, 4), dtype=np.uint8) * 10) + da_obj = xr.DataArray( + arr, dims=['y', 'x'], + coords={'y': np.linspace(40, 30, 4), + 'x': np.linspace(-5, 5, 4)}, + attrs={'crs': 'EPSG:4326'}, + ) + with pytest.raises(ValueError, match="uint8"): + reproject(da_obj, 'EPSG:4326', nodata=-9999) + + def test_reproject_uint16_too_large_nodata_raises(self): + from xrspatial.reproject import reproject + arr = (np.ones((4, 4), dtype=np.uint16) * 10) + da_obj = xr.DataArray( + arr, dims=['y', 'x'], + coords={'y': np.linspace(40, 30, 4), + 'x': np.linspace(-5, 5, 4)}, + attrs={'crs': 'EPSG:4326'}, + ) + with pytest.raises(ValueError, match="uint16"): + reproject(da_obj, 'EPSG:4326', nodata=70000) + + def test_reproject_uint8_in_range_nodata_writes_correct_pixels(self): + """Happy path: representable nodata produces non-corrupted output + where unfilled pixels carry the declared sentinel. + """ + from xrspatial.reproject import reproject + arr = (np.ones((4, 4), dtype=np.uint8) * 10) + da_obj = xr.DataArray( + arr, dims=['y', 'x'], + coords={'y': np.linspace(40, 30, 4), + 'x': np.linspace(-5, 5, 4)}, + attrs={'crs': 'EPSG:4326'}, + ) + # Expand bounds so the output has nodata around the edges. + out = reproject( + da_obj, 'EPSG:4326', nodata=255, + bounds=(-20, 20, 20, 50), + ) + assert out.dtype == np.uint8 + assert out.attrs['nodata'] == 255.0 + # The sentinel actually appears in the array (no silent wrap). + n_nodata = int((out.values == 255).sum()) + assert n_nodata > 0 + # The entire top row sits above the source extent (y=40 is the + # source max), so it must be all nodata, not a mix of 0 and 255. + assert (out.values[0, :] == 255).all(), ( + f"top row should be all nodata=255, got {out.values[0, :]}" + ) + + def test_reproject_int16_negative_nodata_works(self): + from xrspatial.reproject import reproject + arr = (np.ones((4, 4), dtype=np.int16) * 10) + da_obj = xr.DataArray( + arr, dims=['y', 'x'], + coords={'y': np.linspace(40, 30, 4), + 'x': np.linspace(-5, 5, 4)}, + attrs={'crs': 'EPSG:4326'}, + ) + out = reproject( + da_obj, 'EPSG:4326', nodata=-32768, + bounds=(-20, 20, 20, 50), + ) + assert out.dtype == np.int16 + assert out.attrs['nodata'] == -32768.0 + assert (out.values == -32768).any() + + # --------------------------------------------------------------------------- # ApproximateTransform # ---------------------------------------------------------------------------