xarray-contrib · brendancol · Jun 3, 2026 · Jun 2, 2026 · Jun 2, 2026 · Jun 2, 2026
diff --git a/xrspatial/focal.py b/xrspatial/focal.py
@@ -33,6 +33,31 @@ class cupy(object):
                              ngjit)
 
 
+def _validate_binary_kernel(kernel, func_name):
+    """Reject non-binary kernels for the mask-based focal APIs.
+
+    ``apply`` and ``focal_stats`` document the kernel as "2D array where
+    values of 1 indicate the kernel" -- a binary membership mask, not a
+    weight array.  The CPU and GPU code paths disagree on what a value
+    other than 0 or 1 means: ``_apply_numpy`` only copies cells where
+    ``kernel == 1`` (so a weight of 2 is dropped), while the GPU sum/mean
+    kernels treat every nonzero cell as a weight (``w * v``).  The result
+    is backend-dependent output for the same call.
+
+    Weighting belongs inside the user-supplied ``func`` (see the ``apply``
+    docstring example) or in ``convolve_2d`` / ``hotspots``, which handle
+    weighted kernels directly.  Reject anything that is not strictly 0/1
+    here so all four backends agree.
+    """
+    if not np.all((kernel == 0) | (kernel == 1)):
+        raise ValueError(
+            f"{func_name}(): kernel must be binary (only 0 and 1 values, "
+            "no other weights or NaN); it is used as a membership mask, not "
+            "a weight array. Apply per-cell weights inside the func argument, "
+            "or use convolve_2d for a weighted convolution."
+        )
+
+
 def _check_kernel_vs_raster_memory(kernel, rows, cols, func_name):
     """Reject kernel + raster combinations that would OOM the host.
 
@@ -552,7 +577,11 @@ def apply(agg=None, kernel=None, func=_calc_mean, name='focal_apply',
         CuPy backed, Dask with NumPy backed, or Dask with CuPy backed
         DataArray.
     kernel : numpy.ndarray
-        2D array where values of 1 indicate the kernel.
+        2D binary array where values of 1 indicate the kernel. The kernel
+        is a membership mask, not a weight array; only 0 and 1 are allowed
+        and any other value raises a ValueError. Apply per-cell weights
+        inside ``func`` (see the example below), or use
+        ``xrspatial.convolution.convolve_2d`` for a weighted convolution.
     func : callable, default=xrspatial.focal._calc_mean
         Function which takes an input array and returns an array.
         For cupy and dask+cupy backends the function must be a
@@ -666,6 +695,7 @@ def apply(agg=None, kernel=None, func=_calc_mean, name='focal_apply',
 
     # Validate the kernel
     kernel = custom_kernel(kernel)
+    _validate_binary_kernel(kernel, func_name='apply')
 
     _validate_boundary(boundary)
 
@@ -1229,7 +1259,10 @@ def focal_stats(agg,
         CuPy backed, Dask with NumPy backed, or Dask with CuPy backed
         DataArray.
     kernel : numpy.array
-        2D array where values of 1 indicate the kernel.
+        2D binary array where values of 1 indicate the kernel. The kernel
+        is a membership mask, not a weight array; only 0 and 1 are allowed
+        and any other value raises a ValueError. For a weighted convolution
+        use ``xrspatial.convolution.convolve_2d`` instead.
     stats_funcs: list of string
         List of statistics types to be calculated.
         Default set to ['mean', 'max', 'min', 'range', 'std', 'var',
@@ -1297,6 +1330,7 @@ def focal_stats(agg,
 
     # Validate the kernel
     kernel = custom_kernel(kernel)
+    _validate_binary_kernel(kernel, func_name='focal_stats')
 
     _validate_boundary(boundary)
 

diff --git a/xrspatial/tests/test_dask_laziness.py b/xrspatial/tests/test_dask_laziness.py
@@ -118,12 +118,14 @@ def test_mean(self, elev):
 
     def test_apply(self, elev):
         from xrspatial.focal import apply as focal_apply
-        kernel = np.ones((3, 3), dtype=np.float32) / 9
+        # apply() takes a binary membership mask, not a weighted kernel.
+        kernel = np.ones((3, 3), dtype=np.float32)
         assert _is_lazy(focal_apply(elev, kernel))
 
     def test_focal_stats(self, elev):
         from xrspatial.focal import focal_stats
-        kernel = np.ones((3, 3), dtype=np.float32) / 9
+        # focal_stats() takes a binary membership mask, not a weighted kernel.
+        kernel = np.ones((3, 3), dtype=np.float32)
         result = focal_stats(elev, kernel)
         assert _is_lazy(result)
 

diff --git a/xrspatial/tests/test_focal.py b/xrspatial/tests/test_focal.py
@@ -636,6 +636,107 @@ def test_focal_stats_preserves_float64(backend):
     assert _compute_dtype(result) == np.float64
 
 
+# --- non-binary kernel rejection (issue-2848) -----------------------------
+# apply() and focal_stats() document the kernel as a binary membership mask
+# ("values of 1 indicate the kernel"). The CPU path only kept cells equal to
+# 1 (dropping a weight of 2 entirely) while the GPU sum/mean kernels weighted
+# every nonzero cell by its value, so the same non-binary kernel produced
+# backend-dependent output. Both APIs now reject non-binary kernels on every
+# backend, so the inconsistency cannot arise.
+
+NON_BINARY_KERNELS = [
+    np.array([[0, 2, 0], [2, 2, 2], [0, 2, 0]]),   # all weights are 2
+    np.array([[0, 1, 0], [1, 2, 1], [0, 1, 0]]),   # mixed 1 and 2
+    np.array([[0, 0.5, 0], [0.5, 1, 0.5], [0, 0.5, 0]]),  # fractional weights
+]
+
+
+@pytest.mark.parametrize("backend", ['numpy', 'cupy', 'dask+numpy', 'dask+cupy'])
+@pytest.mark.parametrize("kernel", NON_BINARY_KERNELS)
+def test_apply_rejects_non_binary_kernel(backend, kernel):
+    from xrspatial.tests.general_checks import has_cuda_and_cupy
+    if 'cupy' in backend and not has_cuda_and_cupy():
+        pytest.skip("Requires CUDA and CuPy")
+    if 'dask' in backend and da is None:
+        pytest.skip("Requires Dask")
+
+    data = np.arange(20, dtype=np.float64).reshape(4, 5)
+    agg = create_test_raster(data, backend=backend, chunks=(2, 3))
+
+    with pytest.raises(ValueError, match="kernel must be binary"):
+        apply(agg, kernel)
+
+
+@pytest.mark.parametrize("backend", ['numpy', 'cupy', 'dask+numpy', 'dask+cupy'])
+@pytest.mark.parametrize("kernel", NON_BINARY_KERNELS)
+def test_focal_stats_rejects_non_binary_kernel(backend, kernel):
+    from xrspatial.tests.general_checks import has_cuda_and_cupy
+    if 'cupy' in backend and not has_cuda_and_cupy():
+        pytest.skip("Requires CUDA and CuPy")
+    if 'dask' in backend and da is None:
+        pytest.skip("Requires Dask")
+
+    data = np.arange(20, dtype=np.float64).reshape(4, 5)
+    agg = create_test_raster(data, backend=backend, chunks=(2, 3))
+
+    with pytest.raises(ValueError, match="kernel must be binary"):
+        focal_stats(agg, kernel, stats_funcs=['mean', 'sum'])
+
+
+@pytest.mark.parametrize("backend", ['numpy', 'cupy', 'dask+numpy', 'dask+cupy'])
+def test_apply_and_focal_stats_accept_binary_kernel(backend):
+    # The binary 0/1 contract still works on every backend after the guard.
+    from xrspatial.tests.general_checks import has_cuda_and_cupy
+    if 'cupy' in backend and not has_cuda_and_cupy():
+        pytest.skip("Requires CUDA and CuPy")
+    if 'dask' in backend and da is None:
+        pytest.skip("Requires Dask")
+
+    data = np.arange(20, dtype=np.float64).reshape(4, 5)
+    kernel = np.array([[0, 1, 0], [1, 1, 1], [0, 1, 0]])
+    agg = create_test_raster(data, backend=backend, chunks=(2, 3))
+
+    if 'cupy' in backend:
+        from xrspatial.focal import _focal_mean_cuda
+        func = _focal_mean_cuda
+    else:
+        from xrspatial.focal import _calc_mean
+        func = _calc_mean
+
+    apply_result = apply(agg, kernel, func)
+    general_output_checks(agg, apply_result)
+
+    stats_result = focal_stats(agg, kernel, stats_funcs=['mean', 'sum'])
+    assert stats_result.ndim == 3
+
+
+def test_apply_rejects_nan_kernel():
+    # A NaN kernel cell is neither 0 nor 1, so it is rejected like any other
+    # non-binary value (the error message calls out NaN explicitly).
+    data = np.arange(20, dtype=np.float64).reshape(4, 5)
+    kernel = np.array([[0, np.nan, 0], [1, 1, 1], [0, 1, 0]])
+    agg = xr.DataArray(data, dims=['y', 'x'])
+    with pytest.raises(ValueError, match="kernel must be binary"):
+        apply(agg, kernel)
+
+
+def test_apply_focal_stats_agree_on_binary_kernel_numpy():
+    # Reference invariant the guard protects: on a binary kernel,
+    # focal_stats(mean) and apply(mean) agree (focal_stats delegates to
+    # apply on the CPU). This is the consistency the issue is about; with a
+    # non-binary kernel the two would have diverged, which is now blocked.
+    data = np.arange(20, dtype=np.float64).reshape(4, 5)
+    kernel = np.array([[0, 1, 0], [1, 1, 1], [0, 1, 0]])
+    agg = xr.DataArray(data, dims=['y', 'x'])
+
+    from xrspatial.focal import _calc_mean
+    apply_mean = apply(agg, kernel, _calc_mean)
+    stats_mean = focal_stats(agg, kernel, stats_funcs=['mean']).sel(stats='mean')
+
+    np.testing.assert_allclose(
+        apply_mean.data, stats_mean.data, equal_nan=True)
+
+
 @pytest.mark.parametrize("backend", ['numpy', 'cupy', 'dask+numpy', 'dask+cupy'])
 def test_apply_keeps_float32(backend):
     # The other side of the contract: a float32 input must not be promoted