Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .claude/sweep-performance-state.csv
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ interpolate-kriging,2026-06-04,SAFE,graph-bound,0,2923,"MEDIUM: memory guard use
interpolate_spline,2026-06-04,SAFE,compute-bound,0,,"scope=spline-only. Audited _spline.py + _validation.py only (not _idw/_kriging). 1 MEDIUM (Cat3 GPU transfer): _spline_dask_cupy/_spline_cupy re-uploaded invariant x_pts/y_pts/weights host->device once per chunk. Fixed in PR #2929: added _tps_evaluate_gpu taking on-device point/weight arrays + only per-chunk grid slices; dask+cupy uploads invariants once at graph build (verified 48->3 on 16 chunks, scales with chunk count). numpy/cupy/dask+cupy parity ~1e-14. Added cupy+dask+cupy parity tests and an upload-count regression test (red without fix: 48!=3). _tps_cuda_kernel 30 regs/thread, 6 scalar locals -- no register pressure. CPU/dask+numpy eval @ngjit, row-major, no materialization. Dask graph probe 2560x2560/256 chunks = 200 tasks (2/chunk), no fan-in. Memory guard _check_spline_memory bounds N^2 solve. No issue filed -- gh issue create denied by auto-mode classifier; finding surfaced directly by sweep. GitHub issue field left empty."
kde,2026-04-14T12:00:00Z,SAFE,compute-bound,0,,Graph construction serialized per-tile. _filter_points_to_tile scans all points per tile. No HIGH findings.
mahalanobis,2026-03-31T18:00:00Z,SAFE,compute-bound,0,,False positive. Numpy path materializes by design. Dask path uses lazy reductions + map_blocks.
mcda,2026-06-10,SAFE,memory-bound,2,3150,"2 HIGH fixed in PR #3158: owa() dask path crashed (da.sort does not exist; memory guard pointed users at the crashing path) and wpm validation ran one compute() per criterion. MEDIUM fixed in PR #3159 (#3151): cupy piecewise + dask+cupy piecewise/categorical raised TypeError via np.asarray on cupy chunks. MEDIUM fixed in PR #3160 (#3152): monte_carlo sensitivity materialized full dask dataset (now chunk-bounded map_blocks, ~8 tasks/chunk at n_samples=1000) and crashed on cupy via per-sample .values; constrain() deep copy dropped. LOW documented, not fixed: fuzzy_overlay builds ones via layers[0]*0+1; _categorical does one full-array pass per mapping key. Verdict SAFE assumes the 3 PRs merge (pre-fix: WILL OOM for MC-on-dask, owa dask broken). GPU paths validated on CUDA host (cupy 13.6)."
morphology,2026-03-31T18:00:00Z,SAFE,compute-bound,0,,
multispectral,2026-05-02,SAFE,compute-bound,0,,"Re-audit 2026-05-02 after PRs 1292 (true_color memory guard) and 1301 (validate_arrays in true_color). Verified SAFE. No HIGH. MEDIUM: da.stack in _true_color_dask/_true_color_dask_cupy at L1702/L1731 creates (1,1,1,1) chunks along band axis (4 bands so impact is minor, scheduling overhead not OOM). LOW: np.zeros((h,w,4)) at L1681 then full overwrite -- np.empty would suffice. All 17 indices use plain map_blocks with no halo; 8192x8192 ndvi graph is 80 tasks, evi/arvi/ebbi 112 tasks."
normalize,2026-03-31T18:00:00Z,SAFE,compute-bound,0,1124,Boolean indexing replaced with lazy nanmin/nanmax/nanmean/nanstd.
Expand Down
43 changes: 29 additions & 14 deletions xrspatial/mcda/combine.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,24 +167,39 @@ def _check_wpm_positive(criteria: xr.Dataset) -> None:
NaN with no error. NaN values are allowed through so the documented
NaN-propagation behaviour is preserved.
"""
bad = []
try:
import dask
import dask.array as da
except ImportError:
da = None

# Mask NaN so they pass through; we only want to flag <= 0.
names = []
mins = []
dask_positions = []
for var_name in criteria.data_vars:
arr = criteria[var_name].data
# Mask NaN so they pass through; we only want to flag <= 0.
try:
import dask.array as da
if isinstance(arr, da.Array):
# Compute once; cheap relative to the full product pass.
min_val = float(da.nanmin(arr).compute())
else:
min_val = float(np.nanmin(arr))
except ImportError:
min_val = float(np.nanmin(arr))
except ValueError:
# All-NaN slice; nothing to flag.
if arr.size == 0:
# Empty layer; nothing to flag.
continue
if da is not None and isinstance(arr, da.Array):
# Defer so every dask layer reduces in one scheduler pass
# below instead of one compute() per criterion.
dask_positions.append(len(names))
names.append(var_name)
mins.append(da.nanmin(arr))
else:
names.append(var_name)
mins.append(float(np.nanmin(arr)))
if dask_positions:
computed = dask.compute(*[mins[i] for i in dask_positions])
for i, value in zip(dask_positions, computed):
mins[i] = float(value)

bad = []
for name, min_val in zip(names, mins):
if not np.isnan(min_val) and min_val <= 0.0:
bad.append((var_name, min_val))
bad.append((name, min_val))
if bad:
details = ", ".join(f"{n!r} (min={v})" for n, v in bad)
raise ValueError(
Expand Down
175 changes: 175 additions & 0 deletions xrspatial/tests/test_mcda.py
Original file line number Diff line number Diff line change
Expand Up @@ -1349,6 +1349,179 @@ def test_oat_sensitivity_dask(self):
assert np.all(np.isfinite(result[var].compute().values))


@pytest.mark.skipif(not HAS_DASK, reason="Requires dask")
class TestOWADask:
"""owa() on dask-backed Datasets (#3150).

Regression: _sort_descending used da.sort, which does not exist,
so the documented out-of-core path crashed with AttributeError.
"""

@pytest.fixture
def numpy_and_dask_criteria(self):
np.random.seed(3150)
arrays = {name: np.random.rand(20, 20) for name in ["a", "b", "c"]}
arrays["a"][3, 4] = np.nan
numpy_ds = xr.Dataset({
name: xr.DataArray(values, dims=["y", "x"])
for name, values in arrays.items()
})
dask_ds = numpy_ds.chunk({"y": 10, "x": 10})
return numpy_ds, dask_ds

@pytest.mark.parametrize(
"chunks",
[
{"y": 10, "x": 10},
# Ragged chunks: 20 does not divide evenly by 7 or 9
{"y": 7, "x": 9},
],
)
def test_owa_dask_matches_numpy(self, numpy_and_dask_criteria, chunks):
numpy_ds, _ = numpy_and_dask_criteria
dask_ds = numpy_ds.chunk(chunks)
w = {"a": 0.4, "b": 0.35, "c": 0.25}
ow = [0.5, 0.3, 0.2]
numpy_result = owa(numpy_ds, w, ow)
dask_result = owa(dask_ds, w, ow)
# Stays lazy until compute
assert hasattr(dask_result.data, "compute")
np.testing.assert_allclose(
dask_result.compute().values, numpy_result.values,
equal_nan=True, atol=1e-14,
)

def test_owa_dask_uniform_order_weights_equals_wlc(
self, numpy_and_dask_criteria,
):
_, dask_ds = numpy_and_dask_criteria
w = {"a": 0.4, "b": 0.35, "c": 0.25}
owa_result = owa(dask_ds, w, [1 / 3] * 3)
wlc_result = wlc(dask_ds, w)
np.testing.assert_allclose(
owa_result.compute().values, wlc_result.compute().values,
equal_nan=True, atol=1e-14,
)


class TestOWACupy:
"""owa() on cupy and dask+cupy inputs (#3150).

Order weights must be moved to the device; mixing numpy operands
into cupy kernels raises TypeError.
"""

@pytest.fixture
def numpy_criteria(self):
np.random.seed(3150)
return xr.Dataset({
name: xr.DataArray(np.random.rand(20, 20), dims=["y", "x"])
for name in ["a", "b", "c"]
})

@pytest.fixture
def owa_args(self):
return {"a": 0.4, "b": 0.35, "c": 0.25}, [0.5, 0.3, 0.2]

@cuda_and_cupy_available
def test_owa_cupy_matches_numpy(self, numpy_criteria, owa_args):
import cupy
w, ow = owa_args
cupy_ds = xr.Dataset({
name: xr.DataArray(
cupy.asarray(numpy_criteria[name].values), dims=["y", "x"],
)
for name in numpy_criteria.data_vars
})
numpy_result = owa(numpy_criteria, w, ow)
cupy_result = owa(cupy_ds, w, ow)
assert isinstance(cupy_result.data, cupy.ndarray)
np.testing.assert_allclose(
cupy_result.data.get(), numpy_result.values, atol=1e-14,
)

@cuda_and_cupy_available
@pytest.mark.skipif(not HAS_DASK, reason="Requires dask")
def test_owa_dask_cupy_matches_numpy(self, numpy_criteria, owa_args):
import cupy
w, ow = owa_args
gpu_ds = xr.Dataset({
name: xr.DataArray(
da.from_array(
cupy.asarray(numpy_criteria[name].values),
chunks=(10, 10),
),
dims=["y", "x"],
)
for name in numpy_criteria.data_vars
})
numpy_result = owa(numpy_criteria, w, ow)
gpu_result = owa(gpu_ds, w, ow)
computed = gpu_result.data.compute()
# Result stays on the device
assert isinstance(computed, cupy.ndarray)
np.testing.assert_allclose(
computed.get(), numpy_result.values, atol=1e-14,
)


@pytest.mark.skipif(not HAS_DASK, reason="Requires dask")
class TestWPMDask:
"""wpm() validation on dask-backed Datasets (#3150).

Regression: _check_wpm_positive ran one .compute() per criterion;
it now batches every nanmin reduction into a single scheduler pass.
"""

@pytest.fixture
def dask_criteria(self):
np.random.seed(3151)
return xr.Dataset({
name: xr.DataArray(
np.random.rand(20, 20) * 0.8 + 0.1, dims=["y", "x"],
).chunk({"y": 10})
for name in ["a", "b", "c"]
})

def test_wpm_dask_matches_numpy(self, dask_criteria):
w = {"a": 0.4, "b": 0.35, "c": 0.25}
dask_result = wpm(dask_criteria, w)
assert hasattr(dask_result.data, "compute")
numpy_result = wpm(dask_criteria.compute(), w)
np.testing.assert_allclose(
dask_result.compute().values, numpy_result.values, atol=1e-14,
)

def test_wpm_dask_validation_single_scheduler_pass(
self, dask_criteria, monkeypatch,
):
import dask
calls = []
orig_compute = dask.compute

def counting_compute(*args, **kwargs):
calls.append(args)
return orig_compute(*args, **kwargs)

monkeypatch.setattr(dask, "compute", counting_compute)
wpm(dask_criteria, {"a": 0.4, "b": 0.35, "c": 0.25})
# One batched pass over all three criteria, not one per layer.
assert len(calls) == 1
assert len(calls[0]) == 3

def test_wpm_dask_rejects_non_positive(self):
bad = xr.Dataset({
"a": xr.DataArray(
np.array([[0.5, 0.2], [0.3, 0.4]]), dims=["y", "x"],
).chunk({"y": 1}),
"b": xr.DataArray(
np.array([[0.5, -0.2], [0.3, 0.4]]), dims=["y", "x"],
).chunk({"y": 1}),
})
with pytest.raises(ValueError, match="non-positive"):
wpm(bad, {"a": 0.5, "b": 0.5})


class TestWPMEdgeCases:
def test_all_ones(self):
"""All criteria at 1.0 should produce 1.0 regardless of weights."""
Expand Down Expand Up @@ -2538,6 +2711,7 @@ def test_matches_numpy(self, label, fn):
_assert_combine_matches_numpy("dask+cupy", fn)

def test_owa_matches_numpy(self):
# Fixed in #3158
_assert_combine_matches_numpy(
"dask+cupy", lambda ds: owa(ds, COMBINE_WEIGHTS, [0.7, 0.3]),
)
Expand Down Expand Up @@ -2721,6 +2895,7 @@ def test_constrain_coords_and_dims(self):
)

def test_constrain_attrs(self):
# Fixed in #3154: constrain preserves input attrs
suit, mask = _constrain_inputs("numpy")
result = constrain(suit, exclude=[mask])
assert result.attrs == suit.attrs
Expand Down
Loading