Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
101 changes: 83 additions & 18 deletions xrspatial/polygonize.py
Original file line number Diff line number Diff line change
Expand Up @@ -776,12 +776,20 @@ def _detect_raster_transform(raster: xr.DataArray):
2. ``raster.rio.transform()`` (rioxarray, if installed). Returns an
``affine.Affine``; iterating it yields the rasterio-ordered 6
coefficients ``(a, b, c, d, e, f)``.
3. ``None``.
3. The raster's own x/y coordinate values (the xarray /
xrspatial standard georeferencing convention -- the same one
``get_dataarray_resolution`` / ``calc_res`` read). This is the
common case: a raster loaded with georeferenced coords and a
``crs`` attr but no separate ``transform`` attr would otherwise
emit pixel-space geometries while ``return_type='geopandas'``
attached a CRS, the exact mismatch #2536 set out to prevent.
4. ``None``.

A raster carrying ``attrs['_xrspatial_no_georef']=True`` (the
xrspatial.geotiff "no georeference" marker) is treated as having
no transform even if ``rio.transform()`` is present, because that
marker explicitly opts out of georeferencing.
no transform even if ``rio.transform()`` or georeferenced coords
are present, because that marker explicitly opts out of
georeferencing.
"""
# Honour the xrspatial.geotiff "no georeference" marker if set.
if raster.attrs.get('_xrspatial_no_georef'):
Expand All @@ -805,16 +813,71 @@ def _detect_raster_transform(raster: xr.DataArray):
# _transform_points does not need.
t = tuple(float(v) for v in tuple(rio_transform)[:6])
# rioxarray returns the identity affine when no transform
# is available, which would silently shift outputs by (0,0)
# and look identical to "no transform". Treat the identity
# the same as None so callers get an unambiguous answer.
if t == (1.0, 0.0, 0.0, 0.0, 1.0, 0.0):
return None
return t
# is available (e.g. coords whose dims it does not recognise
# as spatial). Treat the identity as "rio has nothing" and
# fall through to the coords-based detection below rather than
# returning the meaningless identity.
if t != (1.0, 0.0, 0.0, 0.0, 1.0, 0.0):
return t
except Exception:
pass

return None
# Fall back to the raster's own x/y coordinate values. polygonize
# emits pixel-CORNER integer indices (_follow walks grid corners),
# and the attrs['transform'] path above maps corner index -> world,
# so a coords-derived transform must put the origin at the corner of
# the first pixel: origin = first_center - 0.5 * spacing.
return _transform_from_coords(raster)


def _coord_spacing(values):
"""Return the uniform step of a 1-D coordinate array, or None.

Requires at least two points and even spacing (within a small
relative tolerance). Returns None on irregular spacing so the
caller falls back to pixel space rather than emitting a wrong
affine from, say, the first two coordinates.
"""
if values.ndim != 1 or values.shape[0] < 2:
return None
diffs = np.diff(values.astype(np.float64))
step = diffs[0]
if step == 0:
return None
# numpy.isclose-style tolerance against the first step.
if not np.all(np.abs(diffs - step) <= (1e-8 + 1e-5 * abs(step))):
return None
return float(step)


def _transform_from_coords(raster: xr.DataArray):
"""Derive a rasterio-ordered transform from the raster's x/y coords.

Uses the raster's actual x/y dim names (``dims[-1]`` for x,
``dims[-2]`` for y), matching ``get_dataarray_resolution``. The
coords must be 1-D, length >= 2 and evenly spaced; otherwise this
returns None.
"""
if raster.ndim < 2:
return None
ydim = raster.dims[-2]
xdim = raster.dims[-1]
if xdim not in raster.coords or ydim not in raster.coords:
return None

xc = np.asarray(raster.coords[xdim].values)
yc = np.asarray(raster.coords[ydim].values)

dx = _coord_spacing(xc)
dy = _coord_spacing(yc)
if dx is None or dy is None:
return None

# Coords are pixel centres; the transform maps pixel-corner index 0
# to the corner of the first cell.
origin_x = float(xc[0]) - 0.5 * dx
origin_y = float(yc[0]) - 0.5 * dy
return (dx, 0.0, origin_x, 0.0, dy, origin_y)


def _to_geopandas(
Expand Down Expand Up @@ -2230,14 +2293,16 @@ def polygonize(
When ``transform`` is not supplied explicitly, the raster's affine
transform is auto-detected in this order: ``raster.attrs['transform']``
(xrspatial.geotiff convention, a rasterio-ordered 6-tuple), then
``raster.rio.transform()`` (if rioxarray is installed). An explicit
``transform=`` argument always overrides the auto-detected value.
Auto-detection is skipped when the raster carries
``attrs['_xrspatial_no_georef']=True``. This applies to all return
types -- the geometries themselves are transformed, so the
coordinates emitted in the "numpy", "awkward", "spatialpandas" and
"geojson" outputs are also in CRS coordinate space, not pixel
space, when the raster carries a transform.
``raster.rio.transform()`` (if rioxarray is installed), then the
raster's own x/y coordinate values (the xarray / xrspatial standard
georeferencing convention; used when the coords are 1-D, length >= 2
and evenly spaced). An explicit ``transform=`` argument always
overrides the auto-detected value. Auto-detection is skipped when
the raster carries ``attrs['_xrspatial_no_georef']=True``. This
applies to all return types -- the geometries themselves are
transformed, so the coordinates emitted in the "numpy", "awkward",
"spatialpandas" and "geojson" outputs are also in CRS coordinate
space, not pixel space, when the raster carries a transform.
"""
_validate_raster(raster, func_name='polygonize', name='raster', ndim=2)
if raster.shape[0] < 1 or raster.shape[1] < 1:
Expand Down
178 changes: 176 additions & 2 deletions xrspatial/tests/test_polygonize.py
Original file line number Diff line number Diff line change
Expand Up @@ -1456,8 +1456,16 @@ def test_explicit_transform_overrides_attr(self):
assert all_ys.max() <= 3.0 + 1e-6

def test_no_transform_info_yields_pixel_coords(self):
"""A raster with no transform info keeps pixel-space coords."""
raster = self._raster() # no attrs
"""A raster with no transform info keeps pixel-space coords.

Build a coordless raster here rather than using ``_raster()``:
that helper sets integer-index coords (``np.arange(3)``), and
since #2607 those coords are themselves a transform source
(resolving to a -0.5 pixel-corner origin), so they would no
longer exercise the "genuinely no transform info" path.
"""
data = np.array([[1, 1, 2], [1, 1, 2], [2, 2, 2]], dtype=np.int32)
raster = xr.DataArray(data, dims=('y', 'x')) # no coords, no attrs
_, polys = polygonize(raster, return_type='numpy')
all_xs = np.concatenate([p[0][:, 0] for p in polys])
all_ys = np.concatenate([p[0][:, 1] for p in polys])
Expand Down Expand Up @@ -1619,6 +1627,172 @@ def test_rio_transform_auto_detected(self):
assert all_xs.min() >= 1_000_000.0 - 1e-6


# --- coords-based transform auto-detection tests (issue #2607) ---
#
# polygonize auto-detected attrs['transform'] and rio.transform() but not
# the raster's own x/y coords -- the xarray/xrspatial standard
# georeferencing channel. A coords-georeferenced raster with a crs attr
# produced a GeoDataFrame whose .crs lied about pixel-space geometries,
# the same misalignment #2536 fixed for the transform attr.

class TestPolygonizeCoordsTransform:
"""polygonize derives a transform from x/y coords when no other
transform source is available (#2607)."""

# 10 m pixels, centres so the implied corner origin is (1e6, 5e6).
@staticmethod
def _coords():
xs = 1_000_000.0 + 10.0 * np.arange(3) + 5.0 # centres
ys = 5_000_000.0 - 10.0 * np.arange(3) - 5.0
return ys, xs

@staticmethod
def _data():
return np.array([[1, 1, 2], [1, 1, 2], [2, 2, 2]], dtype=np.int32)

def _raster(self, data=None, **attrs):
ys, xs = self._coords()
if data is None:
data = self._data()
return xr.DataArray(
data, dims=('y', 'x'),
coords={'y': ys, 'x': xs}, attrs=attrs)

def test_coords_transform_numpy(self):
"""Coords-derived transform lands geometries in CRS space."""
raster = self._raster()
_, polys = polygonize(raster, return_type='numpy')
all_xs = np.concatenate([p[0][:, 0] for p in polys])
all_ys = np.concatenate([p[0][:, 1] for p in polys])
# Corner extent: centres span 1e6..1e6+20, so corners 1e6..1e6+30.
assert all_xs.min() >= 1_000_000.0 - 1e-6
assert all_xs.max() <= 1_000_000.0 + 30.0 + 1e-6
assert all_ys.max() <= 5_000_000.0 + 1e-6
assert all_ys.min() >= 5_000_000.0 - 30.0 - 1e-6

@pytest.mark.skipif(gpd is None, reason="geopandas not installed")
def test_coords_transform_geopandas_crs_matches_geometry(self):
"""The geopandas .crs no longer lies about pixel-space geometry."""
raster = self._raster(crs='EPSG:3857')
df = polygonize(raster, return_type='geopandas')
assert df.crs is not None and df.crs.to_epsg() == 3857
bounds = df.geometry.total_bounds # [minx, miny, maxx, maxy]
assert bounds[0] >= 1_000_000.0 - 1e-6
assert bounds[2] <= 1_000_000.0 + 30.0 + 1e-6
assert bounds[1] >= 5_000_000.0 - 30.0 - 1e-6
assert bounds[3] <= 5_000_000.0 + 1e-6

def test_transform_attr_beats_coords(self):
"""attrs['transform'] takes precedence over coords."""
# Coords imply origin 1e6; the attr puts origin at 0.
identity10 = (10.0, 0.0, 0.0, 0.0, 10.0, 0.0)
raster = self._raster(transform=identity10)
_, polys = polygonize(raster, return_type='numpy')
all_xs = np.concatenate([p[0][:, 0] for p in polys])
# Origin 0, 10 m pixels, 3 cols -> max 30, not ~1e6.
assert all_xs.max() <= 30.0 + 1e-6

def test_explicit_transform_beats_coords(self):
"""An explicit transform= overrides coords-derived transform."""
raster = self._raster()
identity = np.array([1.0, 0.0, 0.0, 0.0, 1.0, 0.0])
_, polys = polygonize(
raster, transform=identity, return_type='numpy')
all_xs = np.concatenate([p[0][:, 0] for p in polys])
assert all_xs.max() <= 3.0 + 1e-6

def test_irregular_coords_yield_no_coords_transform(self):
"""The coords fallback bails on unevenly spaced coords.

``_transform_from_coords`` is the no-rioxarray fallback (step 3 in
``_detect_raster_transform``). When rioxarray is installed it owns
step 2 and will average irregular spacing into a transform, so this
guards the lower-level helper directly rather than the full pipeline.
"""
from ..polygonize import _transform_from_coords
ys, _ = self._coords()
raster = xr.DataArray(
self._data(), dims=('y', 'x'),
coords={'y': ys, 'x': [0.0, 1.0, 5.0]}) # irregular x
assert _transform_from_coords(raster) is None

def test_single_pixel_coords_fall_back(self):
"""A length-1 coord dim has no spacing; fall back to pixel space."""
raster = xr.DataArray(
np.array([[7]], dtype=np.int32), dims=('y', 'x'),
coords={'y': [5_000_000.0], 'x': [1_000_000.0]})
_, polys = polygonize(raster, return_type='numpy')
all_xs = np.concatenate([p[0][:, 0] for p in polys])
assert all_xs.max() <= 2.0 + 1e-6 # nx padded to 2 internally

def test_no_coords_yields_pixel_space(self):
"""No coords at all -> pixel-space output (unchanged behaviour)."""
raster = xr.DataArray(self._data(), dims=('y', 'x'))
_, polys = polygonize(raster, return_type='numpy')
all_xs = np.concatenate([p[0][:, 0] for p in polys])
assert all_xs.max() <= 3.0 + 1e-6

def test_no_georef_marker_suppresses_coords(self):
"""_xrspatial_no_georef opts out of coords auto-detection too."""
raster = self._raster(_xrspatial_no_georef=True)
_, polys = polygonize(raster, return_type='numpy')
all_xs = np.concatenate([p[0][:, 0] for p in polys])
assert all_xs.max() <= 3.0 + 1e-6

def test_coords_transform_with_non_yx_dim_names(self):
"""Coords detection uses the raster's actual dim names, not 'y'/'x'.

get_dataarray_resolution treats dims[-1] as x and dims[-2] as y,
so a raster dimensioned ('lat', 'lon') must still be georeferenced
from its coords.
"""
ys, xs = self._coords()
raster = xr.DataArray(
self._data(), dims=('lat', 'lon'),
coords={'lat': ys, 'lon': xs})
_, polys = polygonize(raster, return_type='numpy')
all_xs = np.concatenate([p[0][:, 0] for p in polys])
assert all_xs.min() >= 1_000_000.0 - 1e-6

@pytest.mark.skipif(da is None, reason="dask not installed")
def test_coords_transform_dask(self):
"""Dask backend honours the coords-derived transform."""
ys, xs = self._coords()
raster = xr.DataArray(
da.from_array(self._data(), chunks=(2, 2)),
dims=('y', 'x'), coords={'y': ys, 'x': xs})
_, polys = polygonize(raster, return_type='numpy')
all_xs = np.concatenate([p[0][:, 0] for p in polys])
assert all_xs.min() >= 1_000_000.0 - 1e-6
assert all_xs.max() <= 1_000_000.0 + 30.0 + 1e-6

@cuda_and_cupy_available
def test_coords_transform_cupy(self):
"""CuPy backend honours the coords-derived transform."""
import cupy as cp
ys, xs = self._coords()
raster = xr.DataArray(
cp.asarray(self._data()),
dims=('y', 'x'), coords={'y': ys, 'x': xs})
_, polys = polygonize(raster, return_type='numpy')
all_xs = np.concatenate([p[0][:, 0] for p in polys])
assert all_xs.min() >= 1_000_000.0 - 1e-6
assert all_xs.max() <= 1_000_000.0 + 30.0 + 1e-6

@cuda_and_cupy_available
def test_coords_transform_dask_cupy(self):
"""Dask+CuPy backend honours the coords-derived transform."""
import cupy as cp
ys, xs = self._coords()
raster = xr.DataArray(
da.from_array(cp.asarray(self._data()), chunks=(2, 2)),
dims=('y', 'x'), coords={'y': ys, 'x': xs})
_, polys = polygonize(raster, return_type='numpy')
all_xs = np.concatenate([p[0][:, 0] for p in polys])
assert all_xs.min() >= 1_000_000.0 - 1e-6
assert all_xs.max() <= 1_000_000.0 + 30.0 + 1e-6


# --- atol / rtol tolerance parameter tests (issue #2173) ---
#
# The float pathway in _calculate_regions groups adjacent pixels using an
Expand Down
Loading