diff --git a/xrspatial/polygonize.py b/xrspatial/polygonize.py index d26bb3a9e..ee5fab45a 100644 --- a/xrspatial/polygonize.py +++ b/xrspatial/polygonize.py @@ -776,12 +776,20 @@ def _detect_raster_transform(raster: xr.DataArray): 2. ``raster.rio.transform()`` (rioxarray, if installed). Returns an ``affine.Affine``; iterating it yields the rasterio-ordered 6 coefficients ``(a, b, c, d, e, f)``. - 3. ``None``. + 3. The raster's own x/y coordinate values (the xarray / + xrspatial standard georeferencing convention -- the same one + ``get_dataarray_resolution`` / ``calc_res`` read). This is the + common case: a raster loaded with georeferenced coords and a + ``crs`` attr but no separate ``transform`` attr would otherwise + emit pixel-space geometries while ``return_type='geopandas'`` + attached a CRS, the exact mismatch #2536 set out to prevent. + 4. ``None``. A raster carrying ``attrs['_xrspatial_no_georef']=True`` (the xrspatial.geotiff "no georeference" marker) is treated as having - no transform even if ``rio.transform()`` is present, because that - marker explicitly opts out of georeferencing. + no transform even if ``rio.transform()`` or georeferenced coords + are present, because that marker explicitly opts out of + georeferencing. """ # Honour the xrspatial.geotiff "no georeference" marker if set. if raster.attrs.get('_xrspatial_no_georef'): @@ -805,16 +813,71 @@ def _detect_raster_transform(raster: xr.DataArray): # _transform_points does not need. t = tuple(float(v) for v in tuple(rio_transform)[:6]) # rioxarray returns the identity affine when no transform - # is available, which would silently shift outputs by (0,0) - # and look identical to "no transform". Treat the identity - # the same as None so callers get an unambiguous answer. - if t == (1.0, 0.0, 0.0, 0.0, 1.0, 0.0): - return None - return t + # is available (e.g. coords whose dims it does not recognise + # as spatial). Treat the identity as "rio has nothing" and + # fall through to the coords-based detection below rather than + # returning the meaningless identity. + if t != (1.0, 0.0, 0.0, 0.0, 1.0, 0.0): + return t except Exception: pass - return None + # Fall back to the raster's own x/y coordinate values. polygonize + # emits pixel-CORNER integer indices (_follow walks grid corners), + # and the attrs['transform'] path above maps corner index -> world, + # so a coords-derived transform must put the origin at the corner of + # the first pixel: origin = first_center - 0.5 * spacing. + return _transform_from_coords(raster) + + +def _coord_spacing(values): + """Return the uniform step of a 1-D coordinate array, or None. + + Requires at least two points and even spacing (within a small + relative tolerance). Returns None on irregular spacing so the + caller falls back to pixel space rather than emitting a wrong + affine from, say, the first two coordinates. + """ + if values.ndim != 1 or values.shape[0] < 2: + return None + diffs = np.diff(values.astype(np.float64)) + step = diffs[0] + if step == 0: + return None + # numpy.isclose-style tolerance against the first step. + if not np.all(np.abs(diffs - step) <= (1e-8 + 1e-5 * abs(step))): + return None + return float(step) + + +def _transform_from_coords(raster: xr.DataArray): + """Derive a rasterio-ordered transform from the raster's x/y coords. + + Uses the raster's actual x/y dim names (``dims[-1]`` for x, + ``dims[-2]`` for y), matching ``get_dataarray_resolution``. The + coords must be 1-D, length >= 2 and evenly spaced; otherwise this + returns None. + """ + if raster.ndim < 2: + return None + ydim = raster.dims[-2] + xdim = raster.dims[-1] + if xdim not in raster.coords or ydim not in raster.coords: + return None + + xc = np.asarray(raster.coords[xdim].values) + yc = np.asarray(raster.coords[ydim].values) + + dx = _coord_spacing(xc) + dy = _coord_spacing(yc) + if dx is None or dy is None: + return None + + # Coords are pixel centres; the transform maps pixel-corner index 0 + # to the corner of the first cell. + origin_x = float(xc[0]) - 0.5 * dx + origin_y = float(yc[0]) - 0.5 * dy + return (dx, 0.0, origin_x, 0.0, dy, origin_y) def _to_geopandas( @@ -2230,14 +2293,16 @@ def polygonize( When ``transform`` is not supplied explicitly, the raster's affine transform is auto-detected in this order: ``raster.attrs['transform']`` (xrspatial.geotiff convention, a rasterio-ordered 6-tuple), then - ``raster.rio.transform()`` (if rioxarray is installed). An explicit - ``transform=`` argument always overrides the auto-detected value. - Auto-detection is skipped when the raster carries - ``attrs['_xrspatial_no_georef']=True``. This applies to all return - types -- the geometries themselves are transformed, so the - coordinates emitted in the "numpy", "awkward", "spatialpandas" and - "geojson" outputs are also in CRS coordinate space, not pixel - space, when the raster carries a transform. + ``raster.rio.transform()`` (if rioxarray is installed), then the + raster's own x/y coordinate values (the xarray / xrspatial standard + georeferencing convention; used when the coords are 1-D, length >= 2 + and evenly spaced). An explicit ``transform=`` argument always + overrides the auto-detected value. Auto-detection is skipped when + the raster carries ``attrs['_xrspatial_no_georef']=True``. This + applies to all return types -- the geometries themselves are + transformed, so the coordinates emitted in the "numpy", "awkward", + "spatialpandas" and "geojson" outputs are also in CRS coordinate + space, not pixel space, when the raster carries a transform. """ _validate_raster(raster, func_name='polygonize', name='raster', ndim=2) if raster.shape[0] < 1 or raster.shape[1] < 1: diff --git a/xrspatial/tests/test_polygonize.py b/xrspatial/tests/test_polygonize.py index c9c7328da..74f4a8af4 100644 --- a/xrspatial/tests/test_polygonize.py +++ b/xrspatial/tests/test_polygonize.py @@ -1456,8 +1456,16 @@ def test_explicit_transform_overrides_attr(self): assert all_ys.max() <= 3.0 + 1e-6 def test_no_transform_info_yields_pixel_coords(self): - """A raster with no transform info keeps pixel-space coords.""" - raster = self._raster() # no attrs + """A raster with no transform info keeps pixel-space coords. + + Build a coordless raster here rather than using ``_raster()``: + that helper sets integer-index coords (``np.arange(3)``), and + since #2607 those coords are themselves a transform source + (resolving to a -0.5 pixel-corner origin), so they would no + longer exercise the "genuinely no transform info" path. + """ + data = np.array([[1, 1, 2], [1, 1, 2], [2, 2, 2]], dtype=np.int32) + raster = xr.DataArray(data, dims=('y', 'x')) # no coords, no attrs _, polys = polygonize(raster, return_type='numpy') all_xs = np.concatenate([p[0][:, 0] for p in polys]) all_ys = np.concatenate([p[0][:, 1] for p in polys]) @@ -1619,6 +1627,172 @@ def test_rio_transform_auto_detected(self): assert all_xs.min() >= 1_000_000.0 - 1e-6 +# --- coords-based transform auto-detection tests (issue #2607) --- +# +# polygonize auto-detected attrs['transform'] and rio.transform() but not +# the raster's own x/y coords -- the xarray/xrspatial standard +# georeferencing channel. A coords-georeferenced raster with a crs attr +# produced a GeoDataFrame whose .crs lied about pixel-space geometries, +# the same misalignment #2536 fixed for the transform attr. + +class TestPolygonizeCoordsTransform: + """polygonize derives a transform from x/y coords when no other + transform source is available (#2607).""" + + # 10 m pixels, centres so the implied corner origin is (1e6, 5e6). + @staticmethod + def _coords(): + xs = 1_000_000.0 + 10.0 * np.arange(3) + 5.0 # centres + ys = 5_000_000.0 - 10.0 * np.arange(3) - 5.0 + return ys, xs + + @staticmethod + def _data(): + return np.array([[1, 1, 2], [1, 1, 2], [2, 2, 2]], dtype=np.int32) + + def _raster(self, data=None, **attrs): + ys, xs = self._coords() + if data is None: + data = self._data() + return xr.DataArray( + data, dims=('y', 'x'), + coords={'y': ys, 'x': xs}, attrs=attrs) + + def test_coords_transform_numpy(self): + """Coords-derived transform lands geometries in CRS space.""" + raster = self._raster() + _, polys = polygonize(raster, return_type='numpy') + all_xs = np.concatenate([p[0][:, 0] for p in polys]) + all_ys = np.concatenate([p[0][:, 1] for p in polys]) + # Corner extent: centres span 1e6..1e6+20, so corners 1e6..1e6+30. + assert all_xs.min() >= 1_000_000.0 - 1e-6 + assert all_xs.max() <= 1_000_000.0 + 30.0 + 1e-6 + assert all_ys.max() <= 5_000_000.0 + 1e-6 + assert all_ys.min() >= 5_000_000.0 - 30.0 - 1e-6 + + @pytest.mark.skipif(gpd is None, reason="geopandas not installed") + def test_coords_transform_geopandas_crs_matches_geometry(self): + """The geopandas .crs no longer lies about pixel-space geometry.""" + raster = self._raster(crs='EPSG:3857') + df = polygonize(raster, return_type='geopandas') + assert df.crs is not None and df.crs.to_epsg() == 3857 + bounds = df.geometry.total_bounds # [minx, miny, maxx, maxy] + assert bounds[0] >= 1_000_000.0 - 1e-6 + assert bounds[2] <= 1_000_000.0 + 30.0 + 1e-6 + assert bounds[1] >= 5_000_000.0 - 30.0 - 1e-6 + assert bounds[3] <= 5_000_000.0 + 1e-6 + + def test_transform_attr_beats_coords(self): + """attrs['transform'] takes precedence over coords.""" + # Coords imply origin 1e6; the attr puts origin at 0. + identity10 = (10.0, 0.0, 0.0, 0.0, 10.0, 0.0) + raster = self._raster(transform=identity10) + _, polys = polygonize(raster, return_type='numpy') + all_xs = np.concatenate([p[0][:, 0] for p in polys]) + # Origin 0, 10 m pixels, 3 cols -> max 30, not ~1e6. + assert all_xs.max() <= 30.0 + 1e-6 + + def test_explicit_transform_beats_coords(self): + """An explicit transform= overrides coords-derived transform.""" + raster = self._raster() + identity = np.array([1.0, 0.0, 0.0, 0.0, 1.0, 0.0]) + _, polys = polygonize( + raster, transform=identity, return_type='numpy') + all_xs = np.concatenate([p[0][:, 0] for p in polys]) + assert all_xs.max() <= 3.0 + 1e-6 + + def test_irregular_coords_yield_no_coords_transform(self): + """The coords fallback bails on unevenly spaced coords. + + ``_transform_from_coords`` is the no-rioxarray fallback (step 3 in + ``_detect_raster_transform``). When rioxarray is installed it owns + step 2 and will average irregular spacing into a transform, so this + guards the lower-level helper directly rather than the full pipeline. + """ + from ..polygonize import _transform_from_coords + ys, _ = self._coords() + raster = xr.DataArray( + self._data(), dims=('y', 'x'), + coords={'y': ys, 'x': [0.0, 1.0, 5.0]}) # irregular x + assert _transform_from_coords(raster) is None + + def test_single_pixel_coords_fall_back(self): + """A length-1 coord dim has no spacing; fall back to pixel space.""" + raster = xr.DataArray( + np.array([[7]], dtype=np.int32), dims=('y', 'x'), + coords={'y': [5_000_000.0], 'x': [1_000_000.0]}) + _, polys = polygonize(raster, return_type='numpy') + all_xs = np.concatenate([p[0][:, 0] for p in polys]) + assert all_xs.max() <= 2.0 + 1e-6 # nx padded to 2 internally + + def test_no_coords_yields_pixel_space(self): + """No coords at all -> pixel-space output (unchanged behaviour).""" + raster = xr.DataArray(self._data(), dims=('y', 'x')) + _, polys = polygonize(raster, return_type='numpy') + all_xs = np.concatenate([p[0][:, 0] for p in polys]) + assert all_xs.max() <= 3.0 + 1e-6 + + def test_no_georef_marker_suppresses_coords(self): + """_xrspatial_no_georef opts out of coords auto-detection too.""" + raster = self._raster(_xrspatial_no_georef=True) + _, polys = polygonize(raster, return_type='numpy') + all_xs = np.concatenate([p[0][:, 0] for p in polys]) + assert all_xs.max() <= 3.0 + 1e-6 + + def test_coords_transform_with_non_yx_dim_names(self): + """Coords detection uses the raster's actual dim names, not 'y'/'x'. + + get_dataarray_resolution treats dims[-1] as x and dims[-2] as y, + so a raster dimensioned ('lat', 'lon') must still be georeferenced + from its coords. + """ + ys, xs = self._coords() + raster = xr.DataArray( + self._data(), dims=('lat', 'lon'), + coords={'lat': ys, 'lon': xs}) + _, polys = polygonize(raster, return_type='numpy') + all_xs = np.concatenate([p[0][:, 0] for p in polys]) + assert all_xs.min() >= 1_000_000.0 - 1e-6 + + @pytest.mark.skipif(da is None, reason="dask not installed") + def test_coords_transform_dask(self): + """Dask backend honours the coords-derived transform.""" + ys, xs = self._coords() + raster = xr.DataArray( + da.from_array(self._data(), chunks=(2, 2)), + dims=('y', 'x'), coords={'y': ys, 'x': xs}) + _, polys = polygonize(raster, return_type='numpy') + all_xs = np.concatenate([p[0][:, 0] for p in polys]) + assert all_xs.min() >= 1_000_000.0 - 1e-6 + assert all_xs.max() <= 1_000_000.0 + 30.0 + 1e-6 + + @cuda_and_cupy_available + def test_coords_transform_cupy(self): + """CuPy backend honours the coords-derived transform.""" + import cupy as cp + ys, xs = self._coords() + raster = xr.DataArray( + cp.asarray(self._data()), + dims=('y', 'x'), coords={'y': ys, 'x': xs}) + _, polys = polygonize(raster, return_type='numpy') + all_xs = np.concatenate([p[0][:, 0] for p in polys]) + assert all_xs.min() >= 1_000_000.0 - 1e-6 + assert all_xs.max() <= 1_000_000.0 + 30.0 + 1e-6 + + @cuda_and_cupy_available + def test_coords_transform_dask_cupy(self): + """Dask+CuPy backend honours the coords-derived transform.""" + import cupy as cp + ys, xs = self._coords() + raster = xr.DataArray( + da.from_array(cp.asarray(self._data()), chunks=(2, 2)), + dims=('y', 'x'), coords={'y': ys, 'x': xs}) + _, polys = polygonize(raster, return_type='numpy') + all_xs = np.concatenate([p[0][:, 0] for p in polys]) + assert all_xs.min() >= 1_000_000.0 - 1e-6 + assert all_xs.max() <= 1_000_000.0 + 30.0 + 1e-6 + + # --- atol / rtol tolerance parameter tests (issue #2173) --- # # The float pathway in _calculate_regions groups adjacent pixels using an