Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 40 additions & 12 deletions xrspatial/resample.py
Original file line number Diff line number Diff line change
Expand Up @@ -1134,8 +1134,12 @@ def _resolve_nodata(agg, nodata):
``nodata`` in ``agg.attrs``. Returns ``None`` when no sentinel was
found (the caller skips the masking step).

NaN sentinels are returned as NaN so the caller can branch on
``np.isnan`` rather than ``==`` (which never matches NaN).
For floating-point inputs the sentinel is returned as a Python
``float`` so the caller can branch on ``np.isnan`` rather than
``==`` (which never matches NaN). For integer / bool inputs the
sentinel is cast to the input dtype so the comparison happens in
integer space -- routing it through ``float`` would lose precision
for int64 values above 2**53.
"""
if nodata is None:
for key in ('_FillValue', 'nodata'):
Expand All @@ -1145,10 +1149,25 @@ def _resolve_nodata(agg, nodata):
break
if nodata is None:
return None
nd = float(nodata)
if np.isinf(nd):
raise ValueError(f"nodata must be finite or NaN, got {nodata!r}")
return nd
if np.issubdtype(agg.dtype, np.floating):
nd = float(nodata)
if np.isinf(nd):
raise ValueError(f"nodata must be finite or NaN, got {nodata!r}")
return nd
# Integer / bool input: keep the sentinel in the input's native
# dtype so the equality test in _apply_nodata_mask compares
# integer-to-integer. A NaN sentinel can never match an integer
# value, so signal a no-op mask by returning NaN unchanged.
if isinstance(nodata, float) and np.isnan(nodata):
return float('nan')
# Reject fractional float sentinels for integer inputs -- silently
# truncating to int would mask cells the caller never asked to mask.
if isinstance(nodata, float) and not nodata.is_integer():
raise ValueError(
f"nodata={nodata!r} is not representable in integer dtype "
f"{agg.dtype}; pass an integer sentinel instead."
)
return np.asarray(nodata).astype(agg.dtype).item()


def _apply_nodata_mask(agg, nodata):
Expand All @@ -1159,13 +1178,22 @@ def _apply_nodata_mask(agg, nodata):
"""
if nodata is None:
return agg
# Promote to float so NaN can be stored. xr.where keeps the backend.
# Integer / bool inputs become float32 (consistent with _output_dtype).
if not np.issubdtype(agg.dtype, np.floating):
is_float_input = np.issubdtype(agg.dtype, np.floating)
# For floating-point input a NaN sentinel needs no replacement
# (NaN is already the output convention). For integer input a NaN
# sentinel can never match any cell, so the mask is a no-op; still
# promote to float so downstream NaN handling has somewhere to
# write its sentinels.
if is_float_input and isinstance(nodata, float) and np.isnan(nodata):
return agg
# Compare in the input dtype FIRST so integer comparisons keep
# full precision (float64 cannot represent int64 values above
# 2**53 without rounding). Then promote to float so NaN can be
# stored in the masked output.
mask = agg != nodata
if not is_float_input:
agg = agg.astype(np.float32)
if np.isnan(nodata):
return agg # already-NaN sentinels need no replacement
return agg.where(agg != nodata)
return agg.where(mask)


@supports_dataset
Expand Down
102 changes: 102 additions & 0 deletions xrspatial/tests/test_resample.py
Original file line number Diff line number Diff line change
Expand Up @@ -1434,3 +1434,105 @@ def test_explicit_nodata_overrides_attr(self):
# Without override the attr would say -999 (no match) and -1 would
# leak through; with override the top-left output pixel is NaN.
assert np.isnan(out.values[0, 0])


# ---------------------------------------------------------------------------
# Integer nodata precision (issue #2570)
# ---------------------------------------------------------------------------

class TestNodataIntegerPrecision:
"""Regression coverage for #2570 -- nodata equality lost precision
when the sentinel was routed through float() before comparison."""

def test_int64_sentinel_above_2pow53_preserves_distinct_values(self):
# float(2**60 + 1) == float(2**60); the old code masked the
# valid 2**60 cell because the rounded sentinel collided with it.
sentinel = (1 << 60) + 1
valid = 1 << 60
data = np.full((4, 4), sentinel, dtype=np.int64)
data[1, 1] = valid
agg = create_test_raster(data, attrs={'res': (1.0, 1.0)})
out = resample(agg, scale_factor=1.0, method='nearest',
nodata=sentinel)
# Valid cell survives AND keeps the right value (float32-quantized).
assert out.values[1, 1] == np.float32(valid)
# Sentinel cells are NaN.
assert np.isnan(out.values[0, 0])

def test_int64_sentinel_via_fillvalue_attr(self):
# Same precision case but the sentinel arrives via _FillValue
# rather than the explicit kwarg.
sentinel = (1 << 60) + 1
valid = 1 << 60
data = np.full((4, 4), sentinel, dtype=np.int64)
data[1, 1] = valid
agg = create_test_raster(
data, attrs={'res': (1.0, 1.0), '_FillValue': sentinel}
)
out = resample(agg, scale_factor=1.0, method='nearest')
assert np.isfinite(out.values[1, 1])
assert np.isnan(out.values[0, 0])

def test_int32_sentinel_unaffected(self):
# int32 cannot trip the float-cast precision loss, but the new
# code path still needs to work end-to-end.
sentinel = np.int32(-2147483648) # int32 min
data = np.array([
[sentinel, sentinel, 10, 10],
[sentinel, sentinel, 10, 10],
[20, 20, 30, 30],
[20, 20, 30, 30],
], dtype=np.int32)
agg = create_test_raster(data, attrs={'res': (1.0, 1.0)})
out = resample(agg, scale_factor=0.5, method='nearest',
nodata=int(sentinel))
assert np.isnan(out.values[0, 0])
assert np.isfinite(out.values[1, 1])

def test_uint16_sentinel(self):
# uint16 max as sentinel -- exercises an unsigned integer dtype.
sentinel = np.uint16(65535)
data = np.array([
[sentinel, sentinel, 10, 10],
[sentinel, sentinel, 10, 10],
[20, 20, 30, 30],
[20, 20, 30, 30],
], dtype=np.uint16)
agg = create_test_raster(data, attrs={'res': (1.0, 1.0)})
out = resample(agg, scale_factor=0.5, method='nearest',
nodata=int(sentinel))
assert np.isnan(out.values[0, 0])
assert np.isfinite(out.values[1, 1])

def test_float_nan_sentinel_still_short_circuits(self):
# Pre-existing float-NaN behaviour: pixels already NaN stay NaN,
# no equality comparison is attempted (NaN != NaN).
data = np.array([[np.nan, np.nan, 5.0, 5.0],
[np.nan, np.nan, 5.0, 5.0],
[3.0, 3.0, 7.0, 7.0],
[3.0, 3.0, 7.0, 7.0]], dtype=np.float32)
agg = create_test_raster(data, attrs={'res': (1.0, 1.0)})
out = resample(agg, scale_factor=0.5, method='nearest',
nodata=float('nan'))
assert np.isnan(out.values[0, 0])
assert np.isfinite(out.values[1, 1])

def test_float_finite_sentinel_unchanged(self):
# Sanity: the float path still routes through float comparison
# and masks the right cells.
data = np.array([[-1.0, -1.0, 5.0, 5.0],
[-1.0, -1.0, 5.0, 5.0],
[3.0, 3.0, 7.0, 7.0],
[3.0, 3.0, 7.0, 7.0]], dtype=np.float64)
agg = create_test_raster(data, attrs={'res': (1.0, 1.0)})
out = resample(agg, scale_factor=0.5, method='nearest', nodata=-1.0)
assert np.isnan(out.values[0, 0])
assert np.isfinite(out.values[1, 1])

def test_fractional_float_sentinel_on_int_input_raises(self):
# Silently truncating -9999.5 to -9999 on int input would mask
# cells the caller never asked to mask -- reject up front.
data = np.zeros((4, 4), dtype=np.int32)
agg = create_test_raster(data, attrs={'res': (1.0, 1.0)})
with pytest.raises(ValueError, match="not representable"):
resample(agg, scale_factor=0.5, method='nearest', nodata=-9999.5)
Loading