Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 36 additions & 2 deletions xrspatial/focal.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,31 @@ class cupy(object):
ngjit)


def _validate_binary_kernel(kernel, func_name):
"""Reject non-binary kernels for the mask-based focal APIs.

``apply`` and ``focal_stats`` document the kernel as "2D array where
values of 1 indicate the kernel" -- a binary membership mask, not a
weight array. The CPU and GPU code paths disagree on what a value
other than 0 or 1 means: ``_apply_numpy`` only copies cells where
``kernel == 1`` (so a weight of 2 is dropped), while the GPU sum/mean
kernels treat every nonzero cell as a weight (``w * v``). The result
is backend-dependent output for the same call.

Weighting belongs inside the user-supplied ``func`` (see the ``apply``
docstring example) or in ``convolve_2d`` / ``hotspots``, which handle
weighted kernels directly. Reject anything that is not strictly 0/1
here so all four backends agree.
"""
if not np.all((kernel == 0) | (kernel == 1)):
raise ValueError(
f"{func_name}(): kernel must be binary (only 0 and 1 values, "
"no other weights or NaN); it is used as a membership mask, not "
"a weight array. Apply per-cell weights inside the func argument, "
"or use convolve_2d for a weighted convolution."
)


def _check_kernel_vs_raster_memory(kernel, rows, cols, func_name):
"""Reject kernel + raster combinations that would OOM the host.

Expand Down Expand Up @@ -552,7 +577,11 @@ def apply(agg=None, kernel=None, func=_calc_mean, name='focal_apply',
CuPy backed, Dask with NumPy backed, or Dask with CuPy backed
DataArray.
kernel : numpy.ndarray
2D array where values of 1 indicate the kernel.
2D binary array where values of 1 indicate the kernel. The kernel
is a membership mask, not a weight array; only 0 and 1 are allowed
and any other value raises a ValueError. Apply per-cell weights
inside ``func`` (see the example below), or use
``xrspatial.convolution.convolve_2d`` for a weighted convolution.
func : callable, default=xrspatial.focal._calc_mean
Function which takes an input array and returns an array.
For cupy and dask+cupy backends the function must be a
Expand Down Expand Up @@ -666,6 +695,7 @@ def apply(agg=None, kernel=None, func=_calc_mean, name='focal_apply',

# Validate the kernel
kernel = custom_kernel(kernel)
_validate_binary_kernel(kernel, func_name='apply')

_validate_boundary(boundary)

Expand Down Expand Up @@ -1229,7 +1259,10 @@ def focal_stats(agg,
CuPy backed, Dask with NumPy backed, or Dask with CuPy backed
DataArray.
kernel : numpy.array
2D array where values of 1 indicate the kernel.
2D binary array where values of 1 indicate the kernel. The kernel
is a membership mask, not a weight array; only 0 and 1 are allowed
and any other value raises a ValueError. For a weighted convolution
use ``xrspatial.convolution.convolve_2d`` instead.
stats_funcs: list of string
List of statistics types to be calculated.
Default set to ['mean', 'max', 'min', 'range', 'std', 'var',
Expand Down Expand Up @@ -1297,6 +1330,7 @@ def focal_stats(agg,

# Validate the kernel
kernel = custom_kernel(kernel)
_validate_binary_kernel(kernel, func_name='focal_stats')

_validate_boundary(boundary)

Expand Down
6 changes: 4 additions & 2 deletions xrspatial/tests/test_dask_laziness.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,12 +118,14 @@ def test_mean(self, elev):

def test_apply(self, elev):
from xrspatial.focal import apply as focal_apply
kernel = np.ones((3, 3), dtype=np.float32) / 9
# apply() takes a binary membership mask, not a weighted kernel.
kernel = np.ones((3, 3), dtype=np.float32)
assert _is_lazy(focal_apply(elev, kernel))

def test_focal_stats(self, elev):
from xrspatial.focal import focal_stats
kernel = np.ones((3, 3), dtype=np.float32) / 9
# focal_stats() takes a binary membership mask, not a weighted kernel.
kernel = np.ones((3, 3), dtype=np.float32)
result = focal_stats(elev, kernel)
assert _is_lazy(result)

Expand Down
101 changes: 101 additions & 0 deletions xrspatial/tests/test_focal.py
Original file line number Diff line number Diff line change
Expand Up @@ -636,6 +636,107 @@ def test_focal_stats_preserves_float64(backend):
assert _compute_dtype(result) == np.float64


# --- non-binary kernel rejection (issue-2848) -----------------------------
# apply() and focal_stats() document the kernel as a binary membership mask
# ("values of 1 indicate the kernel"). The CPU path only kept cells equal to
# 1 (dropping a weight of 2 entirely) while the GPU sum/mean kernels weighted
# every nonzero cell by its value, so the same non-binary kernel produced
# backend-dependent output. Both APIs now reject non-binary kernels on every
# backend, so the inconsistency cannot arise.

NON_BINARY_KERNELS = [
np.array([[0, 2, 0], [2, 2, 2], [0, 2, 0]]), # all weights are 2
np.array([[0, 1, 0], [1, 2, 1], [0, 1, 0]]), # mixed 1 and 2
np.array([[0, 0.5, 0], [0.5, 1, 0.5], [0, 0.5, 0]]), # fractional weights
]


@pytest.mark.parametrize("backend", ['numpy', 'cupy', 'dask+numpy', 'dask+cupy'])
@pytest.mark.parametrize("kernel", NON_BINARY_KERNELS)
def test_apply_rejects_non_binary_kernel(backend, kernel):
from xrspatial.tests.general_checks import has_cuda_and_cupy
if 'cupy' in backend and not has_cuda_and_cupy():
pytest.skip("Requires CUDA and CuPy")
if 'dask' in backend and da is None:
pytest.skip("Requires Dask")

data = np.arange(20, dtype=np.float64).reshape(4, 5)
agg = create_test_raster(data, backend=backend, chunks=(2, 3))

with pytest.raises(ValueError, match="kernel must be binary"):
apply(agg, kernel)


@pytest.mark.parametrize("backend", ['numpy', 'cupy', 'dask+numpy', 'dask+cupy'])
@pytest.mark.parametrize("kernel", NON_BINARY_KERNELS)
def test_focal_stats_rejects_non_binary_kernel(backend, kernel):
from xrspatial.tests.general_checks import has_cuda_and_cupy
if 'cupy' in backend and not has_cuda_and_cupy():
pytest.skip("Requires CUDA and CuPy")
if 'dask' in backend and da is None:
pytest.skip("Requires Dask")

data = np.arange(20, dtype=np.float64).reshape(4, 5)
agg = create_test_raster(data, backend=backend, chunks=(2, 3))

with pytest.raises(ValueError, match="kernel must be binary"):
focal_stats(agg, kernel, stats_funcs=['mean', 'sum'])


@pytest.mark.parametrize("backend", ['numpy', 'cupy', 'dask+numpy', 'dask+cupy'])
def test_apply_and_focal_stats_accept_binary_kernel(backend):
# The binary 0/1 contract still works on every backend after the guard.
from xrspatial.tests.general_checks import has_cuda_and_cupy
if 'cupy' in backend and not has_cuda_and_cupy():
pytest.skip("Requires CUDA and CuPy")
if 'dask' in backend and da is None:
pytest.skip("Requires Dask")

data = np.arange(20, dtype=np.float64).reshape(4, 5)
kernel = np.array([[0, 1, 0], [1, 1, 1], [0, 1, 0]])
agg = create_test_raster(data, backend=backend, chunks=(2, 3))

if 'cupy' in backend:
from xrspatial.focal import _focal_mean_cuda
func = _focal_mean_cuda
else:
from xrspatial.focal import _calc_mean
func = _calc_mean

apply_result = apply(agg, kernel, func)
general_output_checks(agg, apply_result)

stats_result = focal_stats(agg, kernel, stats_funcs=['mean', 'sum'])
assert stats_result.ndim == 3


def test_apply_rejects_nan_kernel():
# A NaN kernel cell is neither 0 nor 1, so it is rejected like any other
# non-binary value (the error message calls out NaN explicitly).
data = np.arange(20, dtype=np.float64).reshape(4, 5)
kernel = np.array([[0, np.nan, 0], [1, 1, 1], [0, 1, 0]])
agg = xr.DataArray(data, dims=['y', 'x'])
with pytest.raises(ValueError, match="kernel must be binary"):
apply(agg, kernel)


def test_apply_focal_stats_agree_on_binary_kernel_numpy():
# Reference invariant the guard protects: on a binary kernel,
# focal_stats(mean) and apply(mean) agree (focal_stats delegates to
# apply on the CPU). This is the consistency the issue is about; with a
# non-binary kernel the two would have diverged, which is now blocked.
data = np.arange(20, dtype=np.float64).reshape(4, 5)
kernel = np.array([[0, 1, 0], [1, 1, 1], [0, 1, 0]])
agg = xr.DataArray(data, dims=['y', 'x'])

from xrspatial.focal import _calc_mean
apply_mean = apply(agg, kernel, _calc_mean)
stats_mean = focal_stats(agg, kernel, stats_funcs=['mean']).sel(stats='mean')

np.testing.assert_allclose(
apply_mean.data, stats_mean.data, equal_nan=True)


@pytest.mark.parametrize("backend", ['numpy', 'cupy', 'dask+numpy', 'dask+cupy'])
def test_apply_keeps_float32(backend):
# The other side of the contract: a float32 input must not be promoted
Expand Down
Loading