Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,16 @@

#### Fixed

- `reproject(..., bounds_policy="auto")` no longer crops valid edge data
on ordinary geographic-to-projected reprojections. The old blow-up
heuristic compared source span (e.g. degrees for EPSG:4326) against
target span (e.g. metres for EPSG:3857) and tripped on almost any
geographic-to-projected pair. The new heuristic is unit-agnostic:
it compares the max absolute projected coordinate to the median
and also checks the non-finite fraction of raw edge samples in the
target CRS. Benign reprojections stay untouched; real singularities
(Mercator at the poles, polar-stereographic on the opposite pole)
still trigger the percentile fallback. (#2582)
- `zonal_stats` now validates `return_type` at entry. Previously any
string other than `'pandas.DataFrame'` or `'xarray.DataArray'` fell
through to an internal branch that returned a raw `numpy.ndarray`,
Expand Down
8 changes: 5 additions & 3 deletions docs/source/reference/reproject.rst
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,11 @@ The available options are:
- ``"auto"`` (default): clamp geographic source bounds inward by
0.01 deg from +/-180 longitude and +/-90 latitude, and fall back to
the 2nd/98th percentile of a dense interior grid when the projected
extent is more than 50x the source extent. Matches the historical
behaviour. Emits a ``UserWarning`` when the clamp or percentile
fallback actually alters the bounds.
extent shows a real singularity blow-up. Blow-up detection is
unit-agnostic: it compares the max absolute projected coordinate
to the median and also flags non-finite raw edge projections.
Emits a ``UserWarning`` when the clamp or percentile fallback
actually alters the bounds.
- ``"raw"``: use the true projected extent of the source corners and
edges. No clamp, no percentile. Pass this when downstream tooling
needs the exact projection of the input footprint and you are willing
Expand Down
106 changes: 83 additions & 23 deletions xrspatial/reproject/_grid.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,39 @@ def _validate_grid_params(*, resolution, bounds, width, height,
)


def _edge_samples(left, bottom, right, top, n_edge):
"""Build edge-sample x/y arrays for a 2D bounding box.

Produces ``4 * n_edge`` (x, y) pairs by sampling the four edges
of the bbox at ``n_edge`` evenly spaced points each.

Parameters
----------
left, bottom, right, top : float
Bounding box in source CRS.
n_edge : int
Sample count per edge.

Returns
-------
xs, ys : ndarray
1-D coordinate arrays of length ``4 * n_edge``.
"""
xs = np.concatenate([
np.linspace(left, right, n_edge), # top edge
np.linspace(left, right, n_edge), # bottom edge
np.full(n_edge, left), # left edge
np.full(n_edge, right), # right edge
])
ys = np.concatenate([
np.full(n_edge, top),
np.full(n_edge, bottom),
np.linspace(bottom, top, n_edge),
np.linspace(bottom, top, n_edge),
])
return xs, ys


def _transform_boundary(source_crs, target_crs, xs, ys):
"""Transform coordinate arrays, preferring Numba fast path over pyproj.

Expand Down Expand Up @@ -212,18 +245,9 @@ def _compute_output_grid(source_bounds, source_shape, source_crs, target_crs,
# don't underestimate the output bounding box.
n_edge = 101
n_interior = 21
edge_xs = np.concatenate([
np.linspace(src_left, src_right, n_edge), # top edge
np.linspace(src_left, src_right, n_edge), # bottom edge
np.full(n_edge, src_left), # left edge
np.full(n_edge, src_right), # right edge
])
edge_ys = np.concatenate([
np.full(n_edge, src_top),
np.full(n_edge, src_bottom),
np.linspace(src_bottom, src_top, n_edge),
np.linspace(src_bottom, src_top, n_edge),
])
edge_xs, edge_ys = _edge_samples(
src_left, src_bottom, src_right, src_top, n_edge,
)
# Interior grid catches cases where the projected extent
# bulges beyond the edges (e.g. Mercator near the poles).
ix = np.linspace(src_left, src_right, n_interior)
Expand Down Expand Up @@ -252,17 +276,53 @@ def _compute_output_grid(source_bounds, source_shape, source_crs, target_crs,
# interior grid instead of just boundary points.
raw_bounds = (float(tx.min()), float(ty.min()),
float(tx.max()), float(ty.max()))
src_w_crs = src_right - src_left
src_h_crs = src_top - src_bottom
out_w_crs = raw_bounds[2] - raw_bounds[0]
out_h_crs = raw_bounds[3] - raw_bounds[1]

# Heuristic: if output span is > 50x source span in either axis,
# boundary points likely wrapped around a singularity. Fall back
# to a dense interior grid to get a tighter bounding box.
ratio_x = out_w_crs / (abs(src_w_crs) + 1e-30)
ratio_y = out_h_crs / (abs(src_h_crs) + 1e-30)
auto_blowup = (ratio_x > 50 or ratio_y > 50)

# Unit-agnostic blow-up detection. Comparing source span (in
# source CRS units) to output span (in target CRS units) is a
# unit mismatch when geographic source projects to a metric
# target, so the previous span-ratio heuristic tripped on
# ordinary EPSG:4326 -> EPSG:3857/UTM cases. Two unit-agnostic
# signals replace it:
#
# 1. Magnitude ratio: max absolute projected coordinate vs
# median. Benign reprojections stay near 1x. Polar-
# stereographic on the opposite pole projects to ~1e23,
# giving a ratio of ~1e16 (well above the threshold).
# 2. Non-finite fraction of *unclamped* raw edge samples.
# Catches Mercator at the poles, where the clamp branch
# above pulls the source bounds in by 0.01 deg and hides
# the inf result that would otherwise reveal the
# singularity. After clamp the magnitude ratio for global
# Mercator drops to ~5, so the non-finite signal is what
# actually trips that case.
abs_coords = np.concatenate([np.abs(tx), np.abs(ty)])
median_abs = float(np.median(abs_coords))
max_abs = float(abs_coords.max())
nonfinite_frac = 0.0
if bounds_policy == "auto" and source_crs.is_geographic:
raw_edge_xs, raw_edge_ys = _edge_samples(
src_left_raw, src_bottom_raw,
src_right_raw, src_top_raw, n_edge,
)
rtx, rty = _transform_boundary(
source_crs, target_crs, raw_edge_xs, raw_edge_ys,
)
rtx = np.asarray(rtx)
rty = np.asarray(rty)
nonfinite_frac = float(
(~(np.isfinite(rtx) & np.isfinite(rty))).mean()
)
# 10x magnitude threshold leaves headroom above the typical
# benign ratio (~1-3) while still tripping the astronomically
# large polar-stereographic blow-up.
magnitude_blowup = (
median_abs > 0 and max_abs / median_abs > 10.0
)
# 5% non-finite frac: benign cases yield 0%, edge cases that
# genuinely touch a singularity yield 15% or more (the global
# 4326 -> 3857 probe yields ~25%). The 5% floor tolerates
# one or two stray inf points from numerical noise.
auto_blowup = magnitude_blowup or nonfinite_frac > 0.05
use_percentile = (
bounds_policy == "percentile"
or (bounds_policy == "auto" and auto_blowup)
Expand Down
125 changes: 125 additions & 0 deletions xrspatial/tests/test_reproject.py
Original file line number Diff line number Diff line change
Expand Up @@ -5260,6 +5260,131 @@ def test_merge_dedupes_per_input_warnings(self):
f"{[str(m.message) for m in matched]}"
)

# -- Issue #2582: unit-aware blow-up heuristic --------------------

def test_auto_does_not_crop_benign_geographic_to_mercator(self):
"""Regression for #2582.

Reprojecting a small EPSG:4326 bbox to EPSG:3857 under
bounds_policy='auto' must match bounds_policy='raw' to a
small tolerance. The old span-ratio heuristic compared
degrees to metres and always tripped on geographic-to-
projected pairs, silently trimming tens of km per side.
"""
from xrspatial.reproject import reproject

data = np.random.RandomState(0).rand(64, 64).astype(np.float32)
r = xr.DataArray(
data, dims=['y', 'x'],
coords={'y': np.linspace(10, -10, 64),
'x': np.linspace(-10, 10, 64)},
attrs={'crs': 'EPSG:4326'},
)
import warnings
with warnings.catch_warnings():
warnings.simplefilter('ignore', UserWarning)
auto_r = reproject(r, 'EPSG:3857', bounds_policy='auto')
raw_r = reproject(r, 'EPSG:3857', bounds_policy='raw')

# Spans should match within one output pixel of res.
ax = auto_r.coords['x'].values
rx = raw_r.coords['x'].values
ay = auto_r.coords['y'].values
ry = raw_r.coords['y'].values

res_x = (rx.max() - rx.min()) / max(1, len(rx) - 1)
res_y = (ry.max() - ry.min()) / max(1, len(ry) - 1)

# No more than one pixel of crop on each side (was ~106 km / ~70 km).
assert abs((ax.max() - ax.min()) - (rx.max() - rx.min())) < 2 * res_x
assert abs((ay.max() - ay.min()) - (ry.max() - ry.min())) < 2 * res_y

def test_auto_silent_on_benign_geographic_to_mercator(self):
"""No bounds_policy warning fires for a benign 4326->3857 case (#2582)."""
from xrspatial.reproject import reproject

data = np.random.RandomState(0).rand(32, 32).astype(np.float32)
r = xr.DataArray(
data, dims=['y', 'x'],
coords={'y': np.linspace(10, -10, 32),
'x': np.linspace(-10, 10, 32)},
attrs={'crs': 'EPSG:4326'},
)
import warnings
with warnings.catch_warnings(record=True) as w:
warnings.simplefilter('always')
reproject(r, 'EPSG:3857', bounds_policy='auto')
matched = [
wi for wi in w
if issubclass(wi.category, UserWarning)
and 'bounds_policy' in str(wi.message)
]
assert not matched, (
f"unexpected bounds_policy warning(s) on benign 4326->3857 "
f"input: {[str(m.message) for m in matched]}"
)

def test_auto_still_trips_polar_stereographic_blowup(self):
"""Pathological 4326->polar-stereo case must still trip auto (#2582).

A global EPSG:4326 raster projected to EPSG:3413 (NSIDC north
polar stereographic) produces finite-but-astronomical
coordinates near the south pole. The new unit-agnostic
heuristic must still catch this and apply the percentile
fallback.
"""
from xrspatial.reproject._grid import _compute_output_grid
from xrspatial.reproject._crs_utils import _resolve_crs

src_crs = _resolve_crs('EPSG:4326')
tgt_crs = _resolve_crs('EPSG:3413')

import warnings
with warnings.catch_warnings(record=True) as w:
warnings.simplefilter('always')
grid = _compute_output_grid(
(-180.0, -90.0, 180.0, 90.0), (100, 200),
src_crs, tgt_crs, bounds_policy='auto',
)
# The auto bounds should be reasonable Earth-scale (well under
# 1e10 m), not the 1e23-scale raw projection.
left, bottom, right, top = grid['bounds']
assert abs(left) < 1e10 and abs(right) < 1e10
assert abs(bottom) < 1e10 and abs(top) < 1e10
# And the warning must fire.
matched = [
wi for wi in w
if issubclass(wi.category, UserWarning)
and 'bounds_policy' in str(wi.message)
and ('blow-up' in str(wi.message) or 'percentile' in str(wi.message))
]
assert matched, (
"expected a blow-up/percentile warning under auto for "
"global 4326 -> polar stereographic"
)

@pytest.mark.skipif(not HAS_DASK, reason="dask required")
def test_auto_does_not_crop_benign_geographic_dask(self):
"""Dask-backed input also gets the unit-aware fix (#2582)."""
from xrspatial.reproject import reproject

data = np.random.RandomState(0).rand(64, 64).astype(np.float32)
r = xr.DataArray(
data, dims=['y', 'x'],
coords={'y': np.linspace(10, -10, 64),
'x': np.linspace(-10, 10, 64)},
attrs={'crs': 'EPSG:4326'},
).chunk({'y': 32, 'x': 32})
import warnings
with warnings.catch_warnings():
warnings.simplefilter('ignore', UserWarning)
auto_r = reproject(r, 'EPSG:3857', bounds_policy='auto')
raw_r = reproject(r, 'EPSG:3857', bounds_policy='raw')
ax = auto_r.coords['x'].values
rx = raw_r.coords['x'].values
res_x = (rx.max() - rx.min()) / max(1, len(rx) - 1)
assert abs((ax.max() - ax.min()) - (rx.max() - rx.min())) < 2 * res_x


# ---------------------------------------------------------------------------
# Integer dtype nodata handling (#2185)
Expand Down
Loading