diff --git a/xrspatial/resample.py b/xrspatial/resample.py index c316590a3..fd9fcaeb9 100644 --- a/xrspatial/resample.py +++ b/xrspatial/resample.py @@ -1134,8 +1134,12 @@ def _resolve_nodata(agg, nodata): ``nodata`` in ``agg.attrs``. Returns ``None`` when no sentinel was found (the caller skips the masking step). - NaN sentinels are returned as NaN so the caller can branch on - ``np.isnan`` rather than ``==`` (which never matches NaN). + For floating-point inputs the sentinel is returned as a Python + ``float`` so the caller can branch on ``np.isnan`` rather than + ``==`` (which never matches NaN). For integer / bool inputs the + sentinel is cast to the input dtype so the comparison happens in + integer space -- routing it through ``float`` would lose precision + for int64 values above 2**53. """ if nodata is None: for key in ('_FillValue', 'nodata'): @@ -1145,10 +1149,25 @@ def _resolve_nodata(agg, nodata): break if nodata is None: return None - nd = float(nodata) - if np.isinf(nd): - raise ValueError(f"nodata must be finite or NaN, got {nodata!r}") - return nd + if np.issubdtype(agg.dtype, np.floating): + nd = float(nodata) + if np.isinf(nd): + raise ValueError(f"nodata must be finite or NaN, got {nodata!r}") + return nd + # Integer / bool input: keep the sentinel in the input's native + # dtype so the equality test in _apply_nodata_mask compares + # integer-to-integer. A NaN sentinel can never match an integer + # value, so signal a no-op mask by returning NaN unchanged. + if isinstance(nodata, float) and np.isnan(nodata): + return float('nan') + # Reject fractional float sentinels for integer inputs -- silently + # truncating to int would mask cells the caller never asked to mask. + if isinstance(nodata, float) and not nodata.is_integer(): + raise ValueError( + f"nodata={nodata!r} is not representable in integer dtype " + f"{agg.dtype}; pass an integer sentinel instead." + ) + return np.asarray(nodata).astype(agg.dtype).item() def _apply_nodata_mask(agg, nodata): @@ -1159,13 +1178,22 @@ def _apply_nodata_mask(agg, nodata): """ if nodata is None: return agg - # Promote to float so NaN can be stored. xr.where keeps the backend. - # Integer / bool inputs become float32 (consistent with _output_dtype). - if not np.issubdtype(agg.dtype, np.floating): + is_float_input = np.issubdtype(agg.dtype, np.floating) + # For floating-point input a NaN sentinel needs no replacement + # (NaN is already the output convention). For integer input a NaN + # sentinel can never match any cell, so the mask is a no-op; still + # promote to float so downstream NaN handling has somewhere to + # write its sentinels. + if is_float_input and isinstance(nodata, float) and np.isnan(nodata): + return agg + # Compare in the input dtype FIRST so integer comparisons keep + # full precision (float64 cannot represent int64 values above + # 2**53 without rounding). Then promote to float so NaN can be + # stored in the masked output. + mask = agg != nodata + if not is_float_input: agg = agg.astype(np.float32) - if np.isnan(nodata): - return agg # already-NaN sentinels need no replacement - return agg.where(agg != nodata) + return agg.where(mask) @supports_dataset diff --git a/xrspatial/tests/test_resample.py b/xrspatial/tests/test_resample.py index 09b434bd7..74f311bfa 100644 --- a/xrspatial/tests/test_resample.py +++ b/xrspatial/tests/test_resample.py @@ -1434,3 +1434,105 @@ def test_explicit_nodata_overrides_attr(self): # Without override the attr would say -999 (no match) and -1 would # leak through; with override the top-left output pixel is NaN. assert np.isnan(out.values[0, 0]) + + +# --------------------------------------------------------------------------- +# Integer nodata precision (issue #2570) +# --------------------------------------------------------------------------- + +class TestNodataIntegerPrecision: + """Regression coverage for #2570 -- nodata equality lost precision + when the sentinel was routed through float() before comparison.""" + + def test_int64_sentinel_above_2pow53_preserves_distinct_values(self): + # float(2**60 + 1) == float(2**60); the old code masked the + # valid 2**60 cell because the rounded sentinel collided with it. + sentinel = (1 << 60) + 1 + valid = 1 << 60 + data = np.full((4, 4), sentinel, dtype=np.int64) + data[1, 1] = valid + agg = create_test_raster(data, attrs={'res': (1.0, 1.0)}) + out = resample(agg, scale_factor=1.0, method='nearest', + nodata=sentinel) + # Valid cell survives AND keeps the right value (float32-quantized). + assert out.values[1, 1] == np.float32(valid) + # Sentinel cells are NaN. + assert np.isnan(out.values[0, 0]) + + def test_int64_sentinel_via_fillvalue_attr(self): + # Same precision case but the sentinel arrives via _FillValue + # rather than the explicit kwarg. + sentinel = (1 << 60) + 1 + valid = 1 << 60 + data = np.full((4, 4), sentinel, dtype=np.int64) + data[1, 1] = valid + agg = create_test_raster( + data, attrs={'res': (1.0, 1.0), '_FillValue': sentinel} + ) + out = resample(agg, scale_factor=1.0, method='nearest') + assert np.isfinite(out.values[1, 1]) + assert np.isnan(out.values[0, 0]) + + def test_int32_sentinel_unaffected(self): + # int32 cannot trip the float-cast precision loss, but the new + # code path still needs to work end-to-end. + sentinel = np.int32(-2147483648) # int32 min + data = np.array([ + [sentinel, sentinel, 10, 10], + [sentinel, sentinel, 10, 10], + [20, 20, 30, 30], + [20, 20, 30, 30], + ], dtype=np.int32) + agg = create_test_raster(data, attrs={'res': (1.0, 1.0)}) + out = resample(agg, scale_factor=0.5, method='nearest', + nodata=int(sentinel)) + assert np.isnan(out.values[0, 0]) + assert np.isfinite(out.values[1, 1]) + + def test_uint16_sentinel(self): + # uint16 max as sentinel -- exercises an unsigned integer dtype. + sentinel = np.uint16(65535) + data = np.array([ + [sentinel, sentinel, 10, 10], + [sentinel, sentinel, 10, 10], + [20, 20, 30, 30], + [20, 20, 30, 30], + ], dtype=np.uint16) + agg = create_test_raster(data, attrs={'res': (1.0, 1.0)}) + out = resample(agg, scale_factor=0.5, method='nearest', + nodata=int(sentinel)) + assert np.isnan(out.values[0, 0]) + assert np.isfinite(out.values[1, 1]) + + def test_float_nan_sentinel_still_short_circuits(self): + # Pre-existing float-NaN behaviour: pixels already NaN stay NaN, + # no equality comparison is attempted (NaN != NaN). + data = np.array([[np.nan, np.nan, 5.0, 5.0], + [np.nan, np.nan, 5.0, 5.0], + [3.0, 3.0, 7.0, 7.0], + [3.0, 3.0, 7.0, 7.0]], dtype=np.float32) + agg = create_test_raster(data, attrs={'res': (1.0, 1.0)}) + out = resample(agg, scale_factor=0.5, method='nearest', + nodata=float('nan')) + assert np.isnan(out.values[0, 0]) + assert np.isfinite(out.values[1, 1]) + + def test_float_finite_sentinel_unchanged(self): + # Sanity: the float path still routes through float comparison + # and masks the right cells. + data = np.array([[-1.0, -1.0, 5.0, 5.0], + [-1.0, -1.0, 5.0, 5.0], + [3.0, 3.0, 7.0, 7.0], + [3.0, 3.0, 7.0, 7.0]], dtype=np.float64) + agg = create_test_raster(data, attrs={'res': (1.0, 1.0)}) + out = resample(agg, scale_factor=0.5, method='nearest', nodata=-1.0) + assert np.isnan(out.values[0, 0]) + assert np.isfinite(out.values[1, 1]) + + def test_fractional_float_sentinel_on_int_input_raises(self): + # Silently truncating -9999.5 to -9999 on int input would mask + # cells the caller never asked to mask -- reject up front. + data = np.zeros((4, 4), dtype=np.int32) + agg = create_test_raster(data, attrs={'res': (1.0, 1.0)}) + with pytest.raises(ValueError, match="not representable"): + resample(agg, scale_factor=0.5, method='nearest', nodata=-9999.5)