diff --git a/xrspatial/tests/test_zonal.py b/xrspatial/tests/test_zonal.py index 2f4c19b0a..0d59481ec 100644 --- a/xrspatial/tests/test_zonal.py +++ b/xrspatial/tests/test_zonal.py @@ -2554,3 +2554,33 @@ def test_stats_return_type_invalid_rejected_for_dataset_2558( ds = xr.Dataset({'v': values}) with pytest.raises(ValueError, match="return_type"): stats(zones=zones, values=ds, return_type='bogus') + + +def test_strides_int64_no_overflow_2612(): + """`_strides` must accumulate counts in int64, not int32. + + The counter runs up to the flattened pixel count. In an int32 array + inside the numba kernel, a count above 2**31-1 wraps silently to a + negative value, corrupting the slice bounds that `_calc_stats` and + crosstab derive from it. See issue #2612. + """ + from xrspatial.zonal import _strides + + # A real >2**31-element input would need ~17 GB to allocate, so the + # dtype assertion below stands in for the actual overflow case; a + # small input is enough to lock in the int64 contract and correctness. + flatten_zones = np.array([0, 0, 0, 1, 1, 2], dtype=np.int64) + unique_zones = np.array([0, 1, 2], dtype=np.int64) + + strides = _strides(flatten_zones, unique_zones) + + # The result dtype is the load-bearing guarantee: int64 cannot wrap + # at realistic raster sizes (~2.1 billion pixels) the way int32 does. + assert strides.dtype == np.int64 + + # Cumulative boundaries stay non-negative and non-decreasing, and the + # final stride equals the number of finite elements. + expected = np.array([3, 5, 6], dtype=np.int64) + np.testing.assert_array_equal(strides, expected) + assert np.all(strides >= 0) + assert np.all(np.diff(strides) >= 0) diff --git a/xrspatial/zonal.py b/xrspatial/zonal.py index c2333b9c5..4f3b36309 100644 --- a/xrspatial/zonal.py +++ b/xrspatial/zonal.py @@ -284,7 +284,7 @@ def _parallel_variance(block_counts, block_sums, block_m2s): def _strides(flatten_zones, unique_zones): num_elements = flatten_zones.shape[0] num_zones = len(unique_zones) - strides = np.zeros(len(unique_zones), dtype=np.int32) + strides = np.zeros(len(unique_zones), dtype=np.int64) count = 0 for i in range(num_zones):