Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 29 additions & 15 deletions xrspatial/resample.py
Original file line number Diff line number Diff line change
Expand Up @@ -1292,6 +1292,27 @@ def _apply_nodata_mask(agg, nodata):
return agg.where(mask)


def _refresh_nodata_attrs(src_attrs, dst_attrs):
"""Refresh nodata sentinels in *dst_attrs* to NaN.

Resample replaces sentinel pixels with NaN regardless of input
dtype. If the input declared a sentinel via ``_FillValue``,
``nodatavals``, or the rasterio-style ``nodata`` attr, refresh each
one to NaN so the metadata matches the actual data. Keys absent on
the input stay absent. ``_resolve_nodata`` reads ``nodata`` as a
fallback, so a stale finite value there would silently mismatch the
masked data on any downstream consumer that trusts
``attrs['nodata']``.
"""
if '_FillValue' in src_attrs:
dst_attrs['_FillValue'] = float('nan')
if 'nodatavals' in src_attrs:
old = src_attrs['nodatavals']
dst_attrs['nodatavals'] = tuple(float('nan') for _ in old)
if 'nodata' in src_attrs:
dst_attrs['nodata'] = float('nan')


@supports_dataset
def resample(
agg: xr.DataArray,
Expand Down Expand Up @@ -1423,7 +1444,13 @@ def resample(
out.name = name
# When nodata was applied, advertise NaN as the new sentinel.
if has_nodata:
# Always advertise NaN via `_FillValue` -- this also covers the
# explicit `nodata=` case where the input carried no nodata
# attrs. Then refresh `nodata` / `nodatavals` for inputs that
# did declare them, so masked-to-NaN output never advertises a
# stale finite sentinel (the non-identity path does the same).
out.attrs['_FillValue'] = float('nan')
_refresh_nodata_attrs(agg.attrs, out.attrs)
return out

# -- 3D: dispatch per band ----------------------------------------------
Expand Down Expand Up @@ -1455,6 +1482,7 @@ def resample(
new_attrs.update(bands[0].attrs) # res from per-band resample
if has_nodata:
new_attrs['_FillValue'] = float('nan')
_refresh_nodata_attrs(agg.attrs, new_attrs)
result.attrs = new_attrs
# Preserve the leading-dim coordinate if it was on the input.
if leading_dim in agg.coords:
Expand Down Expand Up @@ -1524,21 +1552,7 @@ def _new_coords(vals, n_out):
px, 0.0, x_edge_start, 0.0, py, y_edge_start,
)

# Resample replaces sentinel pixels with NaN regardless of input
# dtype. If the input declared a sentinel via `_FillValue`,
# `nodatavals`, or the rasterio-style `nodata` attr, refresh each
# one to NaN so the metadata matches the actual data. Leave the
# keys absent when the input did not have them. `_resolve_nodata`
# reads `nodata` as a fallback, so we must refresh it too -- a
# stale finite value here would silently mismatch the masked data
# on any downstream consumer that trusts `attrs['nodata']`.
if '_FillValue' in agg.attrs:
new_attrs['_FillValue'] = float('nan')
if 'nodatavals' in agg.attrs:
old = agg.attrs['nodatavals']
new_attrs['nodatavals'] = tuple(float('nan') for _ in old)
if 'nodata' in agg.attrs:
new_attrs['nodata'] = float('nan')
_refresh_nodata_attrs(agg.attrs, new_attrs)

# Carry across scalar (zero-dim) non-dim coords like rioxarray's
# `spatial_ref` or a squeezed `time` / `band` selector. The
Expand Down
84 changes: 84 additions & 0 deletions xrspatial/tests/test_resample.py
Original file line number Diff line number Diff line change
Expand Up @@ -1538,6 +1538,90 @@ def test_explicit_nodata_overrides_attr(self):
assert np.isnan(out.values[0, 0])


# ---------------------------------------------------------------------------
# Identity fast path nodata metadata (issue #2662)
# ---------------------------------------------------------------------------

class TestIdentityNodataMetadata:
"""The identity fast path (scale_factor=1.0) masks sentinels to NaN
but used to refresh only `_FillValue`, leaving `nodata` and
`nodatavals` advertising the stale finite sentinel. Every nodata
attr the input declared must read NaN on the output, matching the
non-identity path."""

_data = np.array([[-9999, -9999, 10, 10],
[-9999, -9999, 10, 10],
[20, 20, 30, 30],
[20, 20, 30, 30]], dtype=np.float32)

# The bug is in backend-independent attr handling; the nodata mask
# itself routes through xarray's `.where`. cupy backends are exercised
# by the cross-backend nodata coverage elsewhere, so this regression
# checks numpy and dask+numpy (mirroring the rest of TestNodata, which
# does not parametrize cupy for the masking path).
@pytest.mark.parametrize('backend', ['numpy', 'dask+numpy'])
def test_identity_refreshes_all_nodata_attrs(self, backend):
if not _backend_available(backend):
pytest.skip(f"backend {backend} unavailable")

agg = create_test_raster(
self._data.copy(), backend=backend, chunks=(2, 2),
attrs={'res': (1.0, 1.0), 'nodata': -9999,
'nodatavals': (-9999,)},
)
out = resample(agg, scale_factor=1.0)
out_np = _to_numpy(out)

# Masked pixel is NaN ...
assert np.isnan(out_np[0, 0])
# ... and no attr still advertises the finite sentinel.
assert np.isnan(out.attrs['_FillValue'])
assert np.isnan(out.attrs['nodata'])
assert len(out.attrs['nodatavals']) == 1
assert np.isnan(out.attrs['nodatavals'][0])

def test_identity_matches_non_identity_attrs(self):
# The identity path and a real downsample must agree on which
# nodata attrs end up as NaN.
attrs = {'res': (1.0, 1.0), 'nodata': -9999, 'nodatavals': (-9999,),
'_FillValue': -9999}
agg = create_test_raster(self._data.copy(), attrs=dict(attrs))
identity = resample(agg, scale_factor=1.0)
agg2 = create_test_raster(self._data.copy(), attrs=dict(attrs))
downsample = resample(agg2, scale_factor=0.5, method='nearest')
for key in ('_FillValue', 'nodata', 'nodatavals'):
id_val = identity.attrs[key]
ds_val = downsample.attrs[key]
if key == 'nodatavals':
assert np.isnan(id_val[0]) and np.isnan(ds_val[0])
else:
assert np.isnan(id_val) and np.isnan(ds_val)

def test_identity_absent_attrs_stay_absent(self):
# Without nodata attrs (and no explicit param) nothing is masked,
# so no nodata attr should appear on the output.
data = np.arange(16, dtype=np.float32).reshape(4, 4)
agg = create_test_raster(data, attrs={'res': (1.0, 1.0)})
out = resample(agg, scale_factor=1.0)
assert 'nodata' not in out.attrs
assert 'nodatavals' not in out.attrs
assert '_FillValue' not in out.attrs

def test_identity_3d_refreshes_nodata_attrs(self):
# The 3D dispatch path shares the same gap.
band = self._data.copy()
data = np.stack([band, band + 100], axis=0)
agg = create_test_raster(
data, dims=['band', 'y', 'x'],
attrs={'res': (1.0, 1.0), 'nodata': -9999,
'nodatavals': (-9999,)},
)
out = resample(agg, scale_factor=1.0)
assert np.isnan(out.attrs['nodata'])
assert np.isnan(out.attrs['nodatavals'][0])
assert np.isnan(out.attrs['_FillValue'])


# ---------------------------------------------------------------------------
# Integer nodata precision (issue #2570)
# ---------------------------------------------------------------------------
Expand Down
Loading