From 9a5279527f57624518da17659699bf67be434622 Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Thu, 28 May 2026 08:31:04 -0700 Subject: [PATCH 1/2] reproject: unit-aware bounds_policy='auto' blow-up detection (#2582) The old span-ratio heuristic compared source span in source CRS units (degrees for EPSG:4326) against output span in target CRS units (metres for EPSG:3857). The unit mismatch made the > 50x threshold fire on ordinary geographic-to-projected reprojections, silently trimming 70-106 km per side on a (-10, -10, 10, 10) bbox. Replace it with a unit-agnostic check: ratio of max absolute projected coordinate to median absolute projected coordinate. Benign reprojections stay near 1-2; real singularities push the ratio past 10 (Mercator at the poles after clamp) or higher (polar-stereographic opposite pole projects to ~1e23). Also flag the non-finite fraction of raw, unclamped edge samples so the auto clamp branch does not mask true singularities. Adds four regression tests under TestBoundsPolicy covering the bug case (numpy and dask backends), warning silence on benign input, and that the polar-stereographic pathological case still trips. --- CHANGELOG.md | 10 +++ docs/source/reference/reproject.rst | 8 +- xrspatial/reproject/_grid.py | 56 ++++++++++--- xrspatial/tests/test_reproject.py | 125 ++++++++++++++++++++++++++++ 4 files changed, 185 insertions(+), 14 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 362705bd3..35a47168e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,16 @@ #### Fixed +- `reproject(..., bounds_policy="auto")` no longer crops valid edge data + on ordinary geographic-to-projected reprojections. The old blow-up + heuristic compared source span (e.g. degrees for EPSG:4326) against + target span (e.g. metres for EPSG:3857) and tripped on almost any + geographic-to-projected pair. The new heuristic is unit-agnostic: + it compares the max absolute projected coordinate to the median + and also checks the non-finite fraction of raw edge samples in the + target CRS. Benign reprojections stay untouched; real singularities + (Mercator at the poles, polar-stereographic on the opposite pole) + still trigger the percentile fallback. (#2582) - `polygonize` now auto-detects the raster's affine transform from `attrs['transform']` (xrspatial.geotiff convention) or `rio.transform()` (rioxarray) when the caller did not pass an diff --git a/docs/source/reference/reproject.rst b/docs/source/reference/reproject.rst index 1d2932431..ab7d6d724 100644 --- a/docs/source/reference/reproject.rst +++ b/docs/source/reference/reproject.rst @@ -26,9 +26,11 @@ The available options are: - ``"auto"`` (default): clamp geographic source bounds inward by 0.01 deg from +/-180 longitude and +/-90 latitude, and fall back to the 2nd/98th percentile of a dense interior grid when the projected - extent is more than 50x the source extent. Matches the historical - behaviour. Emits a ``UserWarning`` when the clamp or percentile - fallback actually alters the bounds. + extent shows a real singularity blow-up. Blow-up detection is + unit-agnostic: it compares the max absolute projected coordinate + to the median and also flags non-finite raw edge projections. + Emits a ``UserWarning`` when the clamp or percentile fallback + actually alters the bounds. - ``"raw"``: use the true projected extent of the source corners and edges. No clamp, no percentile. Pass this when downstream tooling needs the exact projection of the input footprint and you are willing diff --git a/xrspatial/reproject/_grid.py b/xrspatial/reproject/_grid.py index 312db7126..88e2f43ef 100644 --- a/xrspatial/reproject/_grid.py +++ b/xrspatial/reproject/_grid.py @@ -252,17 +252,51 @@ def _compute_output_grid(source_bounds, source_shape, source_crs, target_crs, # interior grid instead of just boundary points. raw_bounds = (float(tx.min()), float(ty.min()), float(tx.max()), float(ty.max())) - src_w_crs = src_right - src_left - src_h_crs = src_top - src_bottom - out_w_crs = raw_bounds[2] - raw_bounds[0] - out_h_crs = raw_bounds[3] - raw_bounds[1] - - # Heuristic: if output span is > 50x source span in either axis, - # boundary points likely wrapped around a singularity. Fall back - # to a dense interior grid to get a tighter bounding box. - ratio_x = out_w_crs / (abs(src_w_crs) + 1e-30) - ratio_y = out_h_crs / (abs(src_h_crs) + 1e-30) - auto_blowup = (ratio_x > 50 or ratio_y > 50) + + # Unit-agnostic blow-up detection. Comparing source span (in + # source CRS units) to output span (in target CRS units) is a + # unit mismatch when geographic source projects to a metric + # target, so the previous span-ratio heuristic tripped on + # ordinary EPSG:4326 -> EPSG:3857/UTM cases. Instead, compare + # the maximum absolute projected coordinate to the median. + # Benign reprojections stay near 1x; real singularities push a + # handful of samples far past the bulk of the points (Mercator + # near the poles, polar-stereographic on the opposite pole). + abs_coords = np.concatenate([np.abs(tx), np.abs(ty)]) + median_abs = float(np.median(abs_coords)) + max_abs = float(abs_coords.max()) + # Also flag inf/nan from the *unclamped* edge transform, since + # the clamp branch above hides true singularities under "auto" + # by trimming source coordinates first. + nonfinite_frac = 0.0 + if bounds_policy == "auto" and source_crs.is_geographic: + raw_edge_xs = np.concatenate([ + np.linspace(src_left_raw, src_right_raw, n_edge), + np.linspace(src_left_raw, src_right_raw, n_edge), + np.full(n_edge, src_left_raw), + np.full(n_edge, src_right_raw), + ]) + raw_edge_ys = np.concatenate([ + np.full(n_edge, src_top_raw), + np.full(n_edge, src_bottom_raw), + np.linspace(src_bottom_raw, src_top_raw, n_edge), + np.linspace(src_bottom_raw, src_top_raw, n_edge), + ]) + rtx, rty = _transform_boundary( + source_crs, target_crs, raw_edge_xs, raw_edge_ys, + ) + rtx = np.asarray(rtx) + rty = np.asarray(rty) + nonfinite_frac = float( + (~(np.isfinite(rtx) & np.isfinite(rty))).mean() + ) + # Magnitude blow-up threshold: 10x covers the polar-stereographic + # opposite-pole case (ratio ~1e16) while leaving headroom above + # the typical benign ratio (~1-3). + magnitude_blowup = ( + median_abs > 0 and max_abs / median_abs > 10.0 + ) + auto_blowup = magnitude_blowup or nonfinite_frac > 0.05 use_percentile = ( bounds_policy == "percentile" or (bounds_policy == "auto" and auto_blowup) diff --git a/xrspatial/tests/test_reproject.py b/xrspatial/tests/test_reproject.py index 4c2bd914e..2ab561f52 100644 --- a/xrspatial/tests/test_reproject.py +++ b/xrspatial/tests/test_reproject.py @@ -5200,6 +5200,131 @@ def test_merge_dedupes_per_input_warnings(self): f"{[str(m.message) for m in matched]}" ) + # -- Issue #2582: unit-aware blow-up heuristic -------------------- + + def test_auto_does_not_crop_benign_geographic_to_mercator(self): + """Regression for #2582. + + Reprojecting a small EPSG:4326 bbox to EPSG:3857 under + bounds_policy='auto' must match bounds_policy='raw' to a + small tolerance. The old span-ratio heuristic compared + degrees to metres and always tripped on geographic-to- + projected pairs, silently trimming tens of km per side. + """ + from xrspatial.reproject import reproject + + data = np.random.RandomState(0).rand(64, 64).astype(np.float32) + r = xr.DataArray( + data, dims=['y', 'x'], + coords={'y': np.linspace(10, -10, 64), + 'x': np.linspace(-10, 10, 64)}, + attrs={'crs': 'EPSG:4326'}, + ) + import warnings + with warnings.catch_warnings(): + warnings.simplefilter('ignore', UserWarning) + auto_r = reproject(r, 'EPSG:3857', bounds_policy='auto') + raw_r = reproject(r, 'EPSG:3857', bounds_policy='raw') + + # Spans should match within one output pixel of res. + ax = auto_r.coords['x'].values + rx = raw_r.coords['x'].values + ay = auto_r.coords['y'].values + ry = raw_r.coords['y'].values + + res_x = (rx.max() - rx.min()) / max(1, len(rx) - 1) + res_y = (ry.max() - ry.min()) / max(1, len(ry) - 1) + + # No more than one pixel of crop on each side (was ~106 km / ~70 km). + assert abs((ax.max() - ax.min()) - (rx.max() - rx.min())) < 2 * res_x + assert abs((ay.max() - ay.min()) - (ry.max() - ry.min())) < 2 * res_y + + def test_auto_silent_on_benign_geographic_to_mercator(self): + """No bounds_policy warning fires for a benign 4326->3857 case (#2582).""" + from xrspatial.reproject import reproject + + data = np.random.RandomState(0).rand(32, 32).astype(np.float32) + r = xr.DataArray( + data, dims=['y', 'x'], + coords={'y': np.linspace(10, -10, 32), + 'x': np.linspace(-10, 10, 32)}, + attrs={'crs': 'EPSG:4326'}, + ) + import warnings + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter('always') + reproject(r, 'EPSG:3857', bounds_policy='auto') + matched = [ + wi for wi in w + if issubclass(wi.category, UserWarning) + and 'bounds_policy' in str(wi.message) + ] + assert not matched, ( + f"unexpected bounds_policy warning(s) on benign 4326->3857 " + f"input: {[str(m.message) for m in matched]}" + ) + + def test_auto_still_trips_polar_stereographic_blowup(self): + """Pathological 4326->polar-stereo case must still trip auto (#2582). + + A global EPSG:4326 raster projected to EPSG:3413 (NSIDC north + polar stereographic) produces finite-but-astronomical + coordinates near the south pole. The new unit-agnostic + heuristic must still catch this and apply the percentile + fallback. + """ + from xrspatial.reproject._grid import _compute_output_grid + from xrspatial.reproject._crs_utils import _resolve_crs + + src_crs = _resolve_crs('EPSG:4326') + tgt_crs = _resolve_crs('EPSG:3413') + + import warnings + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter('always') + grid = _compute_output_grid( + (-180.0, -90.0, 180.0, 90.0), (100, 200), + src_crs, tgt_crs, bounds_policy='auto', + ) + # The auto bounds should be reasonable Earth-scale (well under + # 1e10 m), not the 1e23-scale raw projection. + left, bottom, right, top = grid['bounds'] + assert abs(left) < 1e10 and abs(right) < 1e10 + assert abs(bottom) < 1e10 and abs(top) < 1e10 + # And the warning must fire. + matched = [ + wi for wi in w + if issubclass(wi.category, UserWarning) + and 'bounds_policy' in str(wi.message) + and ('blow-up' in str(wi.message) or 'percentile' in str(wi.message)) + ] + assert matched, ( + "expected a blow-up/percentile warning under auto for " + "global 4326 -> polar stereographic" + ) + + @pytest.mark.skipif(not HAS_DASK, reason="dask required") + def test_auto_does_not_crop_benign_geographic_dask(self): + """Dask-backed input also gets the unit-aware fix (#2582).""" + from xrspatial.reproject import reproject + + data = np.random.RandomState(0).rand(64, 64).astype(np.float32) + r = xr.DataArray( + data, dims=['y', 'x'], + coords={'y': np.linspace(10, -10, 64), + 'x': np.linspace(-10, 10, 64)}, + attrs={'crs': 'EPSG:4326'}, + ).chunk({'y': 32, 'x': 32}) + import warnings + with warnings.catch_warnings(): + warnings.simplefilter('ignore', UserWarning) + auto_r = reproject(r, 'EPSG:3857', bounds_policy='auto') + raw_r = reproject(r, 'EPSG:3857', bounds_policy='raw') + ax = auto_r.coords['x'].values + rx = raw_r.coords['x'].values + res_x = (rx.max() - rx.min()) / max(1, len(rx) - 1) + assert abs((ax.max() - ax.min()) - (rx.max() - rx.min())) < 2 * res_x + # --------------------------------------------------------------------------- # Integer dtype nodata handling (#2185) From 1d371062b2e9759887102445f727fb74b4bcc0f1 Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Thu, 28 May 2026 08:35:15 -0700 Subject: [PATCH 2/2] Address review nits: factor edge-sampling helper, clarify thresholds (#2582) - Extract _edge_samples() helper. The non-finite probe was duplicating the same 4-edge sampling pattern as the main edge+interior sampler; consolidating keeps the two callers in sync if n_edge is ever tuned. - Add a comment explaining why the auto_blowup signal combines two heuristics and why the magnitude case alone is insufficient (global 4326 -> 3857 has ratio ~5 after the clamp, well under the 10x threshold; the non-finite probe is what trips that case). - Add a comment explaining the 5% non-finite threshold: benign cases yield 0%, real singularities yield 15%+, the floor tolerates one or two stray inf points from numerical noise. --- xrspatial/reproject/_grid.py | 96 +++++++++++++++++++++++------------- 1 file changed, 61 insertions(+), 35 deletions(-) diff --git a/xrspatial/reproject/_grid.py b/xrspatial/reproject/_grid.py index 88e2f43ef..16149c476 100644 --- a/xrspatial/reproject/_grid.py +++ b/xrspatial/reproject/_grid.py @@ -112,6 +112,39 @@ def _validate_grid_params(*, resolution, bounds, width, height, ) +def _edge_samples(left, bottom, right, top, n_edge): + """Build edge-sample x/y arrays for a 2D bounding box. + + Produces ``4 * n_edge`` (x, y) pairs by sampling the four edges + of the bbox at ``n_edge`` evenly spaced points each. + + Parameters + ---------- + left, bottom, right, top : float + Bounding box in source CRS. + n_edge : int + Sample count per edge. + + Returns + ------- + xs, ys : ndarray + 1-D coordinate arrays of length ``4 * n_edge``. + """ + xs = np.concatenate([ + np.linspace(left, right, n_edge), # top edge + np.linspace(left, right, n_edge), # bottom edge + np.full(n_edge, left), # left edge + np.full(n_edge, right), # right edge + ]) + ys = np.concatenate([ + np.full(n_edge, top), + np.full(n_edge, bottom), + np.linspace(bottom, top, n_edge), + np.linspace(bottom, top, n_edge), + ]) + return xs, ys + + def _transform_boundary(source_crs, target_crs, xs, ys): """Transform coordinate arrays, preferring Numba fast path over pyproj. @@ -212,18 +245,9 @@ def _compute_output_grid(source_bounds, source_shape, source_crs, target_crs, # don't underestimate the output bounding box. n_edge = 101 n_interior = 21 - edge_xs = np.concatenate([ - np.linspace(src_left, src_right, n_edge), # top edge - np.linspace(src_left, src_right, n_edge), # bottom edge - np.full(n_edge, src_left), # left edge - np.full(n_edge, src_right), # right edge - ]) - edge_ys = np.concatenate([ - np.full(n_edge, src_top), - np.full(n_edge, src_bottom), - np.linspace(src_bottom, src_top, n_edge), - np.linspace(src_bottom, src_top, n_edge), - ]) + edge_xs, edge_ys = _edge_samples( + src_left, src_bottom, src_right, src_top, n_edge, + ) # Interior grid catches cases where the projected extent # bulges beyond the edges (e.g. Mercator near the poles). ix = np.linspace(src_left, src_right, n_interior) @@ -257,31 +281,29 @@ def _compute_output_grid(source_bounds, source_shape, source_crs, target_crs, # source CRS units) to output span (in target CRS units) is a # unit mismatch when geographic source projects to a metric # target, so the previous span-ratio heuristic tripped on - # ordinary EPSG:4326 -> EPSG:3857/UTM cases. Instead, compare - # the maximum absolute projected coordinate to the median. - # Benign reprojections stay near 1x; real singularities push a - # handful of samples far past the bulk of the points (Mercator - # near the poles, polar-stereographic on the opposite pole). + # ordinary EPSG:4326 -> EPSG:3857/UTM cases. Two unit-agnostic + # signals replace it: + # + # 1. Magnitude ratio: max absolute projected coordinate vs + # median. Benign reprojections stay near 1x. Polar- + # stereographic on the opposite pole projects to ~1e23, + # giving a ratio of ~1e16 (well above the threshold). + # 2. Non-finite fraction of *unclamped* raw edge samples. + # Catches Mercator at the poles, where the clamp branch + # above pulls the source bounds in by 0.01 deg and hides + # the inf result that would otherwise reveal the + # singularity. After clamp the magnitude ratio for global + # Mercator drops to ~5, so the non-finite signal is what + # actually trips that case. abs_coords = np.concatenate([np.abs(tx), np.abs(ty)]) median_abs = float(np.median(abs_coords)) max_abs = float(abs_coords.max()) - # Also flag inf/nan from the *unclamped* edge transform, since - # the clamp branch above hides true singularities under "auto" - # by trimming source coordinates first. nonfinite_frac = 0.0 if bounds_policy == "auto" and source_crs.is_geographic: - raw_edge_xs = np.concatenate([ - np.linspace(src_left_raw, src_right_raw, n_edge), - np.linspace(src_left_raw, src_right_raw, n_edge), - np.full(n_edge, src_left_raw), - np.full(n_edge, src_right_raw), - ]) - raw_edge_ys = np.concatenate([ - np.full(n_edge, src_top_raw), - np.full(n_edge, src_bottom_raw), - np.linspace(src_bottom_raw, src_top_raw, n_edge), - np.linspace(src_bottom_raw, src_top_raw, n_edge), - ]) + raw_edge_xs, raw_edge_ys = _edge_samples( + src_left_raw, src_bottom_raw, + src_right_raw, src_top_raw, n_edge, + ) rtx, rty = _transform_boundary( source_crs, target_crs, raw_edge_xs, raw_edge_ys, ) @@ -290,12 +312,16 @@ def _compute_output_grid(source_bounds, source_shape, source_crs, target_crs, nonfinite_frac = float( (~(np.isfinite(rtx) & np.isfinite(rty))).mean() ) - # Magnitude blow-up threshold: 10x covers the polar-stereographic - # opposite-pole case (ratio ~1e16) while leaving headroom above - # the typical benign ratio (~1-3). + # 10x magnitude threshold leaves headroom above the typical + # benign ratio (~1-3) while still tripping the astronomically + # large polar-stereographic blow-up. magnitude_blowup = ( median_abs > 0 and max_abs / median_abs > 10.0 ) + # 5% non-finite frac: benign cases yield 0%, edge cases that + # genuinely touch a singularity yield 15% or more (the global + # 4326 -> 3857 probe yields ~25%). The 5% floor tolerates + # one or two stray inf points from numerical noise. auto_blowup = magnitude_blowup or nonfinite_frac > 0.05 use_percentile = ( bounds_policy == "percentile"