From d841a46a83fc1824911f54895973c49cd44371ad Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Tue, 9 Jun 2026 16:50:06 -0700 Subject: [PATCH 1/3] Fix bounded dask GREAT_CIRCLE proximity missing antimeridian targets (#3108) The map_overlap halo was sized as a linear sum of per-column parallel-arc steps, but great-circle distance is periodic in longitude and its chords shorten toward the poles, so array-space adjacency is not a lower bound on spherical distance. Derive the column halo from the chord bound 2R asin(cos(lat_max) |sin(dlon/2)|) and fold the x axis into a single chunk when the +/-180 seam or the 180-degree chord at worst-case latitude is within max_distance. Covers dask+numpy and dask+cupy (shared _halo_depth). --- xrspatial/proximity.py | 86 +++++++++++++++++++++++++++---- xrspatial/tests/test_proximity.py | 77 +++++++++++++++++++++++++++ 2 files changed, 152 insertions(+), 11 deletions(-) diff --git a/xrspatial/proximity.py b/xrspatial/proximity.py index 652a53169..a9d325b9f 100644 --- a/xrspatial/proximity.py +++ b/xrspatial/proximity.py @@ -284,6 +284,65 @@ def _check_monotonic_coords(x_coords, y_coords, x, y): ) +def _great_circle_col_halo(x_coords, y_coords, max_distance): + """Column halo depth (in pixels) for the GREAT_CIRCLE metric. + + Great-circle distance is periodic in longitude (haversine takes the + short way around the sphere) and its chords shorten toward the poles, + so a linear sum of per-column step distances is not a lower bound on + the spherical distance between two grid points. Use the chord bound + + dist(p, t) >= 2 * R * asin(cos(lat_max) * |sin(dlon / 2)|) + + which holds for every pair of grid points (``lat_max`` is the largest + absolute latitude on the raster). Inverting it for ``max_distance`` + gives ``dlon_max``: any pair separated by more than ``dlon_max`` + degrees of longitude (the short way around) is farther apart than + ``max_distance`` no matter the latitudes. + + When the bound cannot exclude anything -- ``max_distance`` reaches the + 180-degree chord at ``lat_max``, or targets across the +/-180 seam are + within reach (seam gap <= ``dlon_max``) -- no array-space halo can + cover the wrap. Return a depth one larger than the axis so + ``_fit_halo_to_chunks`` folds the x axis into a single chunk and every + chunk sees all columns. + """ + width = len(x_coords) + if width < 2: + return 0 + fold = width + 1 + + radius = 6378137.0 + half_angle = max_distance / (2.0 * radius) + if half_angle >= np.pi / 2.0: + # max_distance spans half the circumference: everything is in reach. + return fold + + cos_lat_max = np.cos(np.radians(np.abs(np.asarray(y_coords)).max())) + sin_half = np.sin(half_angle) + if sin_half >= cos_lat_max: + # Even a 180-degree longitude gap at the worst-case latitude stays + # within max_distance, so no longitude separation excludes a target. + return fold + + dlon_max = np.degrees(2.0 * np.arcsin(sin_half / cos_lat_max)) + + # Wrap check: the smallest longitude separation through the +/-180 seam + # is between the first and last columns. If that is within dlon_max, a + # target near one edge of the array can be the nearest target of a pixel + # near the opposite edge, which no per-chunk halo can express. + span = abs(float(x_coords[-1]) - float(x_coords[0])) + seam_gap = 360.0 - span + if seam_gap <= dlon_max: + return fold + + # No wrap in reach: a target k columns away is at least k * min_step + # degrees of longitude away (monotonic coords), so columns beyond + # dlon_max / min_step are excluded by the chord bound. + min_step = np.abs(np.diff(np.asarray(x_coords, dtype=np.float64))).min() + return int(np.ceil(dlon_max / min_step)) + + def _halo_depth(x_coords, y_coords, max_distance, distance_metric): """Overlap depth in pixels for the bounded dask map_overlap call. @@ -302,10 +361,15 @@ def _halo_depth(x_coords, y_coords, max_distance, distance_metric): no halo along that axis (depth 0), so (1, N) and (N, 1) rasters do not crash on the missing second coordinate. - For GREAT_CIRCLE the east-west distance per degree of longitude shrinks - toward the poles, so the column spacing is measured at the highest-latitude - row (largest absolute y) to take the worst case. The north-south distance - does not depend on longitude, so the row spacing uses a fixed longitude. + For GREAT_CIRCLE the column depth comes from the spherical chord bound + in ``_great_circle_col_halo``: longitude is periodic and chords shorten + toward the poles, so a per-column linear step sum is not a valid lower + bound there. When targets across the +/-180 seam are within + ``max_distance``, the returned column depth exceeds the axis length so + ``_fit_halo_to_chunks`` folds the axis. The row depth stays linear: the + great-circle distance between two points is never smaller than their + meridian (north-south) separation, so the per-row step sum is a valid + lower bound for every metric. """ def _min_step_distance(coords, x_ref, y_ref, along): if len(coords) < 2: @@ -322,15 +386,15 @@ def _min_step_distance(coords, x_ref, y_ref, along): smallest = d return smallest - # Worst-case latitude for east-west spacing: the row farthest from the - # equator, where a degree of longitude covers the least ground. - y_worst = max(y_coords, key=abs) - dist_per_row = _min_step_distance(y_coords, x_coords[0], None, "row") - dist_per_col = _min_step_distance(x_coords, None, y_worst, "col") - pad_y = 0 if dist_per_row is None else int(max_distance / dist_per_row + 0.5) - pad_x = 0 if dist_per_col is None else int(max_distance / dist_per_col + 0.5) + + if distance_metric == GREAT_CIRCLE: + pad_x = _great_circle_col_halo(x_coords, y_coords, max_distance) + else: + dist_per_col = _min_step_distance(x_coords, None, y_coords[0], "col") + pad_x = 0 if dist_per_col is None else int( + max_distance / dist_per_col + 0.5) return pad_y, pad_x diff --git a/xrspatial/tests/test_proximity.py b/xrspatial/tests/test_proximity.py index b71959a6d..d3b4acae0 100644 --- a/xrspatial/tests/test_proximity.py +++ b/xrspatial/tests/test_proximity.py @@ -1318,6 +1318,83 @@ def test_great_circle_dask_bounded_matches_numpy(func): ) +# --------------------------------------------------------------------------- +# Regression tests for issue #3108: bounded GREAT_CIRCLE on dask missed +# targets across the +/-180 antimeridian seam. The map_overlap halo is sized +# in array space, but great-circle distance is periodic in longitude, so a +# target near one edge of the array can be the nearest target of a pixel +# near the opposite edge. numpy/cupy (whole-raster brute force) found it; +# dask+numpy/dask+cupy returned NaN. +# --------------------------------------------------------------------------- + +def _antimeridian_raster(): + lon = np.arange(-179.5, 180.0, 1.0) # 360 columns spanning the seam + lat = np.arange(-5.0, 6.0, 1.0) + data = np.zeros((lat.size, lon.size)) + data[5, 0] = 1.0 # target at (lat 0, lon -179.5) + raster = xr.DataArray(data, dims=['lat', 'lon']) + raster['lon'] = lon + raster['lat'] = lat + return raster, data + + +@pytest.mark.skipif(da is None, reason="dask is not installed") +@pytest.mark.parametrize("backend", ['dask+numpy', 'dask+cupy']) +@pytest.mark.parametrize("func", [proximity, allocation, direction]) +def test_great_circle_dask_antimeridian_matches_numpy(backend, func): + """Bounded GREAT_CIRCLE dask sees targets across the +/-180 seam.""" + if has_cuda_and_cupy() is False and 'cupy' in backend: + pytest.skip("Requires CUDA and CuPy") + + raster, data = _antimeridian_raster() + kwargs = dict(x='lon', y='lat', distance_metric='GREAT_CIRCLE', + max_distance=250_000.0) + + numpy_result = func(raster, **kwargs) + # The wrap pixel at lon 179.5 is ~111 km from the target at lon -179.5, + # within max_distance: it must be found, not NaN. + assert np.isfinite(numpy_result.values[5, -1]) + + chunk_data = raster.data + if 'cupy' in backend: + import cupy + chunk_data = cupy.asarray(chunk_data) + dask_raster = raster.copy(data=da.from_array(chunk_data, chunks=(11, 60))) + dask_result = func(dask_raster, **kwargs) + + result_data = dask_result.data.compute() + if 'cupy' in backend: + result_data = result_data.get() + np.testing.assert_allclose( + result_data, numpy_result.values, rtol=1e-5, equal_nan=True, + ) + + +def test_great_circle_halo_folds_on_wrap_and_pole(): + """_halo_depth signals an x-axis fold when the chord bound cannot help.""" + from xrspatial.proximity import GREAT_CIRCLE, _halo_depth + + # Raster spanning the antimeridian: seam gap (1 degree) within reach. + lon = np.arange(-179.5, 180.0, 1.0) + lat = np.arange(-5.0, 6.0, 1.0) + _, pad_x = _halo_depth(lon, lat, 250_000.0, GREAT_CIRCLE) + assert pad_x > len(lon), "wrap-reachable raster must fold the x axis" + + # Pole-adjacent raster: the 180-degree chord at lat 89.75 is ~62 km, + # within max_distance, so no longitude separation excludes a target. + lon = np.arange(0.0, 180.0, 1.0) + lat = np.arange(88.0, 90.0, 0.25) + _, pad_x = _halo_depth(lon, lat, 500_000.0, GREAT_CIRCLE) + assert pad_x > len(lon), "pole-adjacent raster must fold the x axis" + + # Regional raster far from seam and pole: finite halo, no fold. + lon = np.arange(0.0, 10.0, 0.1) + lat = np.arange(40.0, 45.0, 0.1) + pad_y, pad_x = _halo_depth(lon, lat, 50_000.0, GREAT_CIRCLE) + assert 0 < pad_x < len(lon) + assert pad_y > 0 + + # --------------------------------------------------------------------------- # Coverage gaps closed for issue #2692 (test-only; no source change). # From f1e4b5ecd7604f7dbe4b2d3f1f69e92b603d6886 Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Tue, 9 Jun 2026 16:50:46 -0700 Subject: [PATCH 2/3] Update accuracy sweep state for proximity (#3108) --- .claude/sweep-accuracy-state.csv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.claude/sweep-accuracy-state.csv b/.claude/sweep-accuracy-state.csv index 974a9bebd..784ce97c2 100644 --- a/.claude/sweep-accuracy-state.csv +++ b/.claude/sweep-accuracy-state.csv @@ -26,7 +26,7 @@ normalize,2026-05-01,,,,rescale and standardize across all 4 backends. NaN/inf f perlin,2026-04-10T12:00:00Z,,,,Improved Perlin noise implementation correct. Fade/gradient functions verified. Backend-consistent. Continuous at cell boundaries. polygon_clip,2026-04-13T12:00:00Z,1197,,,crop=True + all_touched=True drops boundary pixels. Fix in PR #1200. polygonize,2026-05-29,2606,HIGH,5,"Cat 5 HIGH: dask connectivity=8 cross-chunk merge filled diagonal notch where same-value regions meet only at a corner across a chunk boundary; total area exceeded raster. Hole ring was dropped because containment tested hole[0] (on exterior at pinch). Fixed via _ring_interior_point in PR for #2606. numpy, dask+numpy, dask+cupy area parity now holds; 4-conn was already correct. cupy + dask+cupy paths validated on GPU host. Other cats clean: NaN masked on numpy/cupy float paths (tested), _is_close handles +/-inf via exact-equality short-circuit, atol/rtol/simplify_tolerance reject NaN/inf, integer GPU CCL matches numpy." -proximity,2026-05-29,2721,MEDIUM,4;5,Bounded GREAT_CIRCLE on dask (both numpy+cupy) raised ValueError: map_overlap pad depth = max_distance/cellsize mixed metre distance with degree cellsize. numpy/cupy backends fine. Fixed by measuring per-pixel pitch with active metric (PR #2722). Cat1 float32 output is documented design choice; NaN/Inf masking via np.isfinite consistent; numpy GDAL-sweep matches exact nearest and cupy brute-force on tested grids. +proximity,2026-06-09,3108,HIGH,4;5,"Cat5/Cat4: bounded GREAT_CIRCLE dask (numpy+cupy) missed targets across the +/-180 antimeridian seam: _halo_depth sized x-halo as linear parallel-arc sum, but haversine is periodic in lon and chords shorten near poles, so array-space adjacency is no lower bound on spherical distance; numpy/cupy (brute force) found the wrap target (~111 km), dask returned NaN. Fixed in #3108 via chord bound 2R asin(cos(lat_max)|sin(dlon/2)|) + x-axis fold when seam/180-deg chord within max_distance (covers over-pole too). CUDA host: cupy + dask+cupy executed, 417+ tests pass. Cat1-3 clean (float32 output documented; NaN via isfinite consistent; bounds guards correct; tie-break unified in #2881). LOW (not fixed): great_circle_distance uses WGS84 equatorial radius 6378137 as sphere radius (~0.1% vs mean-radius convention) but documented and exposed as param." reproject,2026-05-29,2620,HIGH,5,"Cat5 backend inconsistency: cupy _resample_cupy (cupyx map_coordinates) diverged from numpy/native on pyproj-fallback CRS pairs (projected->projected, e.g. EPSG:32633->3857). Edge-band cval=0.0 bleed (all modes, ~534/pixel) + cubic B-spline vs Catmull-Rom (~0.45 interior). Fixed PR for #2620: route eager+dask cupy through _resample_cupy_native. Other files clean: _merge numpy/cupy structurally identical; _datum_grids/_vertical/_itrf use -0.5 pixel-center interp and self-inequality NaN checks; WGS84/GRS80 constants correct; curvature correction n/a (no geodesic gradient here). LOW (not fixed): _transform._bilinear_interp_2ch docstring claims parallel but isn't." resample,2026-05-29,2610,HIGH,3;5,"dask interp (nearest/bilinear) overlap depth=1 too small on downsample; block-centered source coord landed past chunk, map_coordinates clamped to edge -> wrong seam rows. Fixed PR #2627 via per-axis _downsample_radius. cupy+dask+cupy verified." sieve,2026-04-13T12:00:00Z,,,,Union-find CCL correct. NaN excluded from labeling. All backends funnel through _sieve_numpy. From 90c9cca54b485378bc7545bc79d9d8e78b9b4132 Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Tue, 9 Jun 2026 16:54:01 -0700 Subject: [PATCH 3/3] Address review: descending-coord halo coverage, drop unused helper return (#3108) --- xrspatial/tests/test_proximity.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/xrspatial/tests/test_proximity.py b/xrspatial/tests/test_proximity.py index d3b4acae0..2f264e6d2 100644 --- a/xrspatial/tests/test_proximity.py +++ b/xrspatial/tests/test_proximity.py @@ -1335,7 +1335,7 @@ def _antimeridian_raster(): raster = xr.DataArray(data, dims=['lat', 'lon']) raster['lon'] = lon raster['lat'] = lat - return raster, data + return raster @pytest.mark.skipif(da is None, reason="dask is not installed") @@ -1346,7 +1346,7 @@ def test_great_circle_dask_antimeridian_matches_numpy(backend, func): if has_cuda_and_cupy() is False and 'cupy' in backend: pytest.skip("Requires CUDA and CuPy") - raster, data = _antimeridian_raster() + raster = _antimeridian_raster() kwargs = dict(x='lon', y='lat', distance_metric='GREAT_CIRCLE', max_distance=250_000.0) @@ -1394,6 +1394,18 @@ def test_great_circle_halo_folds_on_wrap_and_pole(): assert 0 < pad_x < len(lon) assert pad_y > 0 + # Descending coordinates (allowed by the monotonic check) give the same + # halo: span and step are direction-independent. + pad_y_desc, pad_x_desc = _halo_depth( + lon[::-1], lat[::-1], 50_000.0, GREAT_CIRCLE) + assert (pad_y_desc, pad_x_desc) == (pad_y, pad_x) + + # Descending wrap raster still folds. + lon_desc = np.arange(-179.5, 180.0, 1.0)[::-1] + lat_desc = np.arange(-5.0, 6.0, 1.0)[::-1] + _, pad_x = _halo_depth(lon_desc, lat_desc, 250_000.0, GREAT_CIRCLE) + assert pad_x > len(lon_desc) + # --------------------------------------------------------------------------- # Coverage gaps closed for issue #2692 (test-only; no source change).