diff --git a/CHANGELOG.md b/CHANGELOG.md index 3e4b25c6e..68d54f0be 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,6 +21,16 @@ #### Fixed +- `reproject(..., bounds_policy="auto")` no longer crops valid edge data + on ordinary geographic-to-projected reprojections. The old blow-up + heuristic compared source span (e.g. degrees for EPSG:4326) against + target span (e.g. metres for EPSG:3857) and tripped on almost any + geographic-to-projected pair. The new heuristic is unit-agnostic: + it compares the max absolute projected coordinate to the median + and also checks the non-finite fraction of raw edge samples in the + target CRS. Benign reprojections stay untouched; real singularities + (Mercator at the poles, polar-stereographic on the opposite pole) + still trigger the percentile fallback. (#2582) - `zonal_stats` now validates `return_type` at entry. Previously any string other than `'pandas.DataFrame'` or `'xarray.DataArray'` fell through to an internal branch that returned a raw `numpy.ndarray`, diff --git a/docs/source/reference/reproject.rst b/docs/source/reference/reproject.rst index 1d2932431..ab7d6d724 100644 --- a/docs/source/reference/reproject.rst +++ b/docs/source/reference/reproject.rst @@ -26,9 +26,11 @@ The available options are: - ``"auto"`` (default): clamp geographic source bounds inward by 0.01 deg from +/-180 longitude and +/-90 latitude, and fall back to the 2nd/98th percentile of a dense interior grid when the projected - extent is more than 50x the source extent. Matches the historical - behaviour. Emits a ``UserWarning`` when the clamp or percentile - fallback actually alters the bounds. + extent shows a real singularity blow-up. Blow-up detection is + unit-agnostic: it compares the max absolute projected coordinate + to the median and also flags non-finite raw edge projections. + Emits a ``UserWarning`` when the clamp or percentile fallback + actually alters the bounds. - ``"raw"``: use the true projected extent of the source corners and edges. No clamp, no percentile. Pass this when downstream tooling needs the exact projection of the input footprint and you are willing diff --git a/xrspatial/reproject/_grid.py b/xrspatial/reproject/_grid.py index 312db7126..16149c476 100644 --- a/xrspatial/reproject/_grid.py +++ b/xrspatial/reproject/_grid.py @@ -112,6 +112,39 @@ def _validate_grid_params(*, resolution, bounds, width, height, ) +def _edge_samples(left, bottom, right, top, n_edge): + """Build edge-sample x/y arrays for a 2D bounding box. + + Produces ``4 * n_edge`` (x, y) pairs by sampling the four edges + of the bbox at ``n_edge`` evenly spaced points each. + + Parameters + ---------- + left, bottom, right, top : float + Bounding box in source CRS. + n_edge : int + Sample count per edge. + + Returns + ------- + xs, ys : ndarray + 1-D coordinate arrays of length ``4 * n_edge``. + """ + xs = np.concatenate([ + np.linspace(left, right, n_edge), # top edge + np.linspace(left, right, n_edge), # bottom edge + np.full(n_edge, left), # left edge + np.full(n_edge, right), # right edge + ]) + ys = np.concatenate([ + np.full(n_edge, top), + np.full(n_edge, bottom), + np.linspace(bottom, top, n_edge), + np.linspace(bottom, top, n_edge), + ]) + return xs, ys + + def _transform_boundary(source_crs, target_crs, xs, ys): """Transform coordinate arrays, preferring Numba fast path over pyproj. @@ -212,18 +245,9 @@ def _compute_output_grid(source_bounds, source_shape, source_crs, target_crs, # don't underestimate the output bounding box. n_edge = 101 n_interior = 21 - edge_xs = np.concatenate([ - np.linspace(src_left, src_right, n_edge), # top edge - np.linspace(src_left, src_right, n_edge), # bottom edge - np.full(n_edge, src_left), # left edge - np.full(n_edge, src_right), # right edge - ]) - edge_ys = np.concatenate([ - np.full(n_edge, src_top), - np.full(n_edge, src_bottom), - np.linspace(src_bottom, src_top, n_edge), - np.linspace(src_bottom, src_top, n_edge), - ]) + edge_xs, edge_ys = _edge_samples( + src_left, src_bottom, src_right, src_top, n_edge, + ) # Interior grid catches cases where the projected extent # bulges beyond the edges (e.g. Mercator near the poles). ix = np.linspace(src_left, src_right, n_interior) @@ -252,17 +276,53 @@ def _compute_output_grid(source_bounds, source_shape, source_crs, target_crs, # interior grid instead of just boundary points. raw_bounds = (float(tx.min()), float(ty.min()), float(tx.max()), float(ty.max())) - src_w_crs = src_right - src_left - src_h_crs = src_top - src_bottom - out_w_crs = raw_bounds[2] - raw_bounds[0] - out_h_crs = raw_bounds[3] - raw_bounds[1] - - # Heuristic: if output span is > 50x source span in either axis, - # boundary points likely wrapped around a singularity. Fall back - # to a dense interior grid to get a tighter bounding box. - ratio_x = out_w_crs / (abs(src_w_crs) + 1e-30) - ratio_y = out_h_crs / (abs(src_h_crs) + 1e-30) - auto_blowup = (ratio_x > 50 or ratio_y > 50) + + # Unit-agnostic blow-up detection. Comparing source span (in + # source CRS units) to output span (in target CRS units) is a + # unit mismatch when geographic source projects to a metric + # target, so the previous span-ratio heuristic tripped on + # ordinary EPSG:4326 -> EPSG:3857/UTM cases. Two unit-agnostic + # signals replace it: + # + # 1. Magnitude ratio: max absolute projected coordinate vs + # median. Benign reprojections stay near 1x. Polar- + # stereographic on the opposite pole projects to ~1e23, + # giving a ratio of ~1e16 (well above the threshold). + # 2. Non-finite fraction of *unclamped* raw edge samples. + # Catches Mercator at the poles, where the clamp branch + # above pulls the source bounds in by 0.01 deg and hides + # the inf result that would otherwise reveal the + # singularity. After clamp the magnitude ratio for global + # Mercator drops to ~5, so the non-finite signal is what + # actually trips that case. + abs_coords = np.concatenate([np.abs(tx), np.abs(ty)]) + median_abs = float(np.median(abs_coords)) + max_abs = float(abs_coords.max()) + nonfinite_frac = 0.0 + if bounds_policy == "auto" and source_crs.is_geographic: + raw_edge_xs, raw_edge_ys = _edge_samples( + src_left_raw, src_bottom_raw, + src_right_raw, src_top_raw, n_edge, + ) + rtx, rty = _transform_boundary( + source_crs, target_crs, raw_edge_xs, raw_edge_ys, + ) + rtx = np.asarray(rtx) + rty = np.asarray(rty) + nonfinite_frac = float( + (~(np.isfinite(rtx) & np.isfinite(rty))).mean() + ) + # 10x magnitude threshold leaves headroom above the typical + # benign ratio (~1-3) while still tripping the astronomically + # large polar-stereographic blow-up. + magnitude_blowup = ( + median_abs > 0 and max_abs / median_abs > 10.0 + ) + # 5% non-finite frac: benign cases yield 0%, edge cases that + # genuinely touch a singularity yield 15% or more (the global + # 4326 -> 3857 probe yields ~25%). The 5% floor tolerates + # one or two stray inf points from numerical noise. + auto_blowup = magnitude_blowup or nonfinite_frac > 0.05 use_percentile = ( bounds_policy == "percentile" or (bounds_policy == "auto" and auto_blowup) diff --git a/xrspatial/tests/test_reproject.py b/xrspatial/tests/test_reproject.py index 34409f699..4eb483d2f 100644 --- a/xrspatial/tests/test_reproject.py +++ b/xrspatial/tests/test_reproject.py @@ -5260,6 +5260,131 @@ def test_merge_dedupes_per_input_warnings(self): f"{[str(m.message) for m in matched]}" ) + # -- Issue #2582: unit-aware blow-up heuristic -------------------- + + def test_auto_does_not_crop_benign_geographic_to_mercator(self): + """Regression for #2582. + + Reprojecting a small EPSG:4326 bbox to EPSG:3857 under + bounds_policy='auto' must match bounds_policy='raw' to a + small tolerance. The old span-ratio heuristic compared + degrees to metres and always tripped on geographic-to- + projected pairs, silently trimming tens of km per side. + """ + from xrspatial.reproject import reproject + + data = np.random.RandomState(0).rand(64, 64).astype(np.float32) + r = xr.DataArray( + data, dims=['y', 'x'], + coords={'y': np.linspace(10, -10, 64), + 'x': np.linspace(-10, 10, 64)}, + attrs={'crs': 'EPSG:4326'}, + ) + import warnings + with warnings.catch_warnings(): + warnings.simplefilter('ignore', UserWarning) + auto_r = reproject(r, 'EPSG:3857', bounds_policy='auto') + raw_r = reproject(r, 'EPSG:3857', bounds_policy='raw') + + # Spans should match within one output pixel of res. + ax = auto_r.coords['x'].values + rx = raw_r.coords['x'].values + ay = auto_r.coords['y'].values + ry = raw_r.coords['y'].values + + res_x = (rx.max() - rx.min()) / max(1, len(rx) - 1) + res_y = (ry.max() - ry.min()) / max(1, len(ry) - 1) + + # No more than one pixel of crop on each side (was ~106 km / ~70 km). + assert abs((ax.max() - ax.min()) - (rx.max() - rx.min())) < 2 * res_x + assert abs((ay.max() - ay.min()) - (ry.max() - ry.min())) < 2 * res_y + + def test_auto_silent_on_benign_geographic_to_mercator(self): + """No bounds_policy warning fires for a benign 4326->3857 case (#2582).""" + from xrspatial.reproject import reproject + + data = np.random.RandomState(0).rand(32, 32).astype(np.float32) + r = xr.DataArray( + data, dims=['y', 'x'], + coords={'y': np.linspace(10, -10, 32), + 'x': np.linspace(-10, 10, 32)}, + attrs={'crs': 'EPSG:4326'}, + ) + import warnings + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter('always') + reproject(r, 'EPSG:3857', bounds_policy='auto') + matched = [ + wi for wi in w + if issubclass(wi.category, UserWarning) + and 'bounds_policy' in str(wi.message) + ] + assert not matched, ( + f"unexpected bounds_policy warning(s) on benign 4326->3857 " + f"input: {[str(m.message) for m in matched]}" + ) + + def test_auto_still_trips_polar_stereographic_blowup(self): + """Pathological 4326->polar-stereo case must still trip auto (#2582). + + A global EPSG:4326 raster projected to EPSG:3413 (NSIDC north + polar stereographic) produces finite-but-astronomical + coordinates near the south pole. The new unit-agnostic + heuristic must still catch this and apply the percentile + fallback. + """ + from xrspatial.reproject._grid import _compute_output_grid + from xrspatial.reproject._crs_utils import _resolve_crs + + src_crs = _resolve_crs('EPSG:4326') + tgt_crs = _resolve_crs('EPSG:3413') + + import warnings + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter('always') + grid = _compute_output_grid( + (-180.0, -90.0, 180.0, 90.0), (100, 200), + src_crs, tgt_crs, bounds_policy='auto', + ) + # The auto bounds should be reasonable Earth-scale (well under + # 1e10 m), not the 1e23-scale raw projection. + left, bottom, right, top = grid['bounds'] + assert abs(left) < 1e10 and abs(right) < 1e10 + assert abs(bottom) < 1e10 and abs(top) < 1e10 + # And the warning must fire. + matched = [ + wi for wi in w + if issubclass(wi.category, UserWarning) + and 'bounds_policy' in str(wi.message) + and ('blow-up' in str(wi.message) or 'percentile' in str(wi.message)) + ] + assert matched, ( + "expected a blow-up/percentile warning under auto for " + "global 4326 -> polar stereographic" + ) + + @pytest.mark.skipif(not HAS_DASK, reason="dask required") + def test_auto_does_not_crop_benign_geographic_dask(self): + """Dask-backed input also gets the unit-aware fix (#2582).""" + from xrspatial.reproject import reproject + + data = np.random.RandomState(0).rand(64, 64).astype(np.float32) + r = xr.DataArray( + data, dims=['y', 'x'], + coords={'y': np.linspace(10, -10, 64), + 'x': np.linspace(-10, 10, 64)}, + attrs={'crs': 'EPSG:4326'}, + ).chunk({'y': 32, 'x': 32}) + import warnings + with warnings.catch_warnings(): + warnings.simplefilter('ignore', UserWarning) + auto_r = reproject(r, 'EPSG:3857', bounds_policy='auto') + raw_r = reproject(r, 'EPSG:3857', bounds_policy='raw') + ax = auto_r.coords['x'].values + rx = raw_r.coords['x'].values + res_x = (rx.max() - rx.min()) / max(1, len(rx) - 1) + assert abs((ax.max() - ax.min()) - (rx.max() - rx.min())) < 2 * res_x + # --------------------------------------------------------------------------- # Integer dtype nodata handling (#2185)