diff --git a/.claude/sweep-metadata-state.csv b/.claude/sweep-metadata-state.csv index 0e055d74e..75567ffa5 100644 --- a/.claude/sweep-metadata-state.csv +++ b/.claude/sweep-metadata-state.csv @@ -10,4 +10,5 @@ rasterize,2026-06-09,3087,MEDIUM,1,GeoDataFrame .crs dropped on no-like path (Ca reproject,2026-06-09,3093,MEDIUM,4;5,"Audited 2026-06-09 (agent-a2f2f5befa9759e9e worktree, branch deep-sweep-metadata-reproject-2026-06-09). CUDA available; all 4 backends (numpy/cupy/dask+numpy/dask+cupy) run live end-to-end for reproject() and merge(). Cat 1 attrs (crs/nodata/res/transform/_FillValue/nodatavals/units refreshed or carried), Cat 2 coords (pixel-center, non-spatial + band coord carry, float64), Cat 3 dims (lat/lon names, band-first round-trip via #2182), and dtype round-trip identical across the 4 backends; vertical_crs/vertical_datum EPSG convention verified; merge attrs from first raster with crs/nodata/res/transform re-emit verified on numpy + dask. NEW MEDIUM finding #3093 (Cat 4 + Cat 5): _reproject_streaming (the reproject() fallback when dask is absent and the in-memory source exceeds 512 MB; dask is an optional dep so reachable on plain pip installs) allocated its assembled output as np.full(out_shape, nodata, dtype=np.float64) in both the local ThreadPoolExecutor and dask.bag distributed branches, so integer sources returned float64 while numpy/cupy/dask+numpy/dask+cupy all round-trip integer dtypes (#2505); it also allocated 2-D so 3-D (y,x,band) sources crashed with a broadcast ValueError. The helper had zero test coverage. Fix in PR #3111 (branch deep-sweep-metadata-reproject-2026-06-09-01): same integer-round-trip dtype rule as _reproject_dask and a (*out_shape, n_bands) allocation for 3-D, in both branches; new TestStreamingDtypeParity (6 tests incl. a LocalCluster run of the distributed branch and value parity vs the in-memory path); full reproject suite 450 passed. LOW (documented, not fixed): geoid_height() docstring says 'Returns N : same type as input' with xr.DataArray listed as accepted, but DataArray inputs return a plain ndarray (coords/dims/attrs dropped) via out.reshape(np.shape(lon)) in _vertical.py." resample,2026-05-27,2542,MEDIUM,2;4;5,"Audited 2026-05-27 (agent-a8135a6a246ecb93c worktree, branch deep-sweep-metadata-resample-2026-05-27). Cat 2 MEDIUM + Cat 4 MEDIUM + Cat 5 MEDIUM all rolled into issue #2542. (a) 2D non-identity path dropped scalar non-dim coords like rioxarrays spatial_ref and squeezed time/band selectors; identity path (scale==1.0, agg.copy()) and 3D path (per-band xr.concat) preserved them, so the bug was path-inconsistent (Cat 5). (b) _resolve_nodata reads attrs[nodata] as a fallback sentinel but the output post-processing only refreshed _FillValue and nodatavals, leaving attrs[nodata]=-9999 alongside data that was now NaN. Fix in resample(): refresh attrs[nodata] to NaN whenever the input had it, and carry across zero-dim non-dim coords on the 2D non-identity path. 7 new tests in TestMetadataPropagation cover nodata-attr refresh, spatial_ref/scalar coord carry, identity-vs-downsample coord parity, and the explicit choice to drop spatially-shaped extra coords. 4-backend (numpy/cupy/dask+numpy/dask+cupy) parity verified for spatial_ref carry; nodata-attr refresh verified on numpy/cupy/dask+numpy (dask+cupy non-NaN nodata masking hits a pre-existing xarray xr.where + cupy.astype quirk unrelated to this audit). Full resample test suite (175 passed) clean." viewshed,2026-05-29,2743,MEDIUM,4;5,output .name differed across backends (None/viewshed/dask-token) and dtype float32 on GPU vs float64 on CPU; added name= param and forced float64 on all backends; attrs/coords/dims already preserved +visibility,2026-06-10,3193,HIGH,5,"cupy backend crash in cumulative_viewshed/visibility_frequency (count np vs cupy add) -> no result/metadata emitted; fixed by cupy count branch + cupy tests. numpy/dask preserve coords/dims/attrs incl crs; visibility_frequency keeps attrs through astype/divide. line_of_sight Dataset drops crs/transform (LOW, transect not raster, documented only)." zonal,2026-05-29,2611,MEDIUM,5,"Audited 2026-05-29 (agent-ae8d8b65cc3a5c40a worktree, branch deep-sweep-metadata-zonal-2026-05-29). CUDA available; all 4 backends (numpy/cupy/dask+numpy/dask+cupy) run live. 5 DataArray-returning functions checked end-to-end: apply, regions, hypsometric_integral, trim, crop. attrs (res/crs/transform/nodatavals), dims, and coords preserved correctly on all 4 backends for every function; trim/crop slice coords with no half-pixel drift. stats() and crosstab() return DataFrames by design so Cat 1-3 DataArray checks N/A. NEW MEDIUM finding #2611 (Cat 5): apply() never set output .name, so numpy/cupy returned None while dask+numpy/dask+cupy inherited a non-deterministic internal dask task name (e.g. _chunk_fn-). regions/hypsometric_integral/trim/crop all set deterministic names; apply was the outlier. Fix in PR #2611/#2622: add name param (default None) and assign result.name after DataArray construction (setting name= at construction does not override the dask graph name). New parametrized test test_apply_name_consistent_across_backends covers default-None and explicit-name on all 4 backends. Full zonal suite 213 passed. No other CRITICAL/HIGH/MEDIUM findings; no LOW findings to document." diff --git a/xrspatial/tests/test_visibility.py b/xrspatial/tests/test_visibility.py index 739b7d2d0..aa0f09332 100644 --- a/xrspatial/tests/test_visibility.py +++ b/xrspatial/tests/test_visibility.py @@ -298,3 +298,67 @@ def test_default_output_name(self): observers = [{'x': 2.0, 'y': 2.0, 'observer_elev': 10}] result = visibility_frequency(raster, observers) assert result.name == 'visibility_frequency' + + +from xrspatial import viewshed +from xrspatial.gpu_rtx import has_rtx +from xrspatial.utils import has_cuda_and_cupy +from xrspatial.viewshed import INVISIBLE + +cupy_skip = pytest.mark.skipif( + not (has_cuda_and_cupy() and has_rtx()), + reason="cupy / rtxpy not available", +) + + +@cupy_skip +class TestCupyBackend: + """cupy backend must return a cupy-backed DataArray with the same + coords, dims, and attrs as the numpy backend (issue #3193).""" + + def _cupy_raster(self): + import cupy as cp + data = np.random.RandomState(1).rand(20, 20).astype(float) * 100 + raster = _make_raster(data) + raster.attrs['crs'] = 'EPSG:4326' + raster.data = cp.asarray(raster.data) + return raster + + def test_cumulative_returns_cupy_with_metadata(self): + import cupy as cp + raster = self._cupy_raster() + observers = [ + {'x': 5.0, 'y': 5.0, 'observer_elev': 50}, + {'x': 12.0, 'y': 12.0, 'observer_elev': 50}, + ] + result = cumulative_viewshed(raster, observers) + assert isinstance(result.data, cp.ndarray) + assert result.dtype == np.int32 + assert result.dims == raster.dims + np.testing.assert_array_equal(result.coords['x'].values, + raster.coords['x'].values) + np.testing.assert_array_equal(result.coords['y'].values, + raster.coords['y'].values) + assert result.attrs.get('crs') == 'EPSG:4326' + + def test_cumulative_matches_single_viewshed(self): + import cupy as cp + raster = self._cupy_raster() + obs = {'x': 5.0, 'y': 5.0, 'observer_elev': 50} + result = cumulative_viewshed(raster, [obs]) + vs = viewshed(raster, x=5.0, y=5.0, observer_elev=50) + expected = (cp.asnumpy(vs.data) != INVISIBLE).astype(np.int32) + np.testing.assert_array_equal(cp.asnumpy(result.data), expected) + + def test_frequency_returns_cupy_with_metadata(self): + import cupy as cp + raster = self._cupy_raster() + observers = [ + {'x': 5.0, 'y': 5.0, 'observer_elev': 50}, + {'x': 12.0, 'y': 12.0, 'observer_elev': 50}, + ] + result = visibility_frequency(raster, observers) + assert isinstance(result.data, cp.ndarray) + assert result.dtype == np.float64 + assert result.dims == raster.dims + assert result.attrs.get('crs') == 'EPSG:4326' diff --git a/xrspatial/visibility.py b/xrspatial/visibility.py index 61c36feb1..72789cbd3 100644 --- a/xrspatial/visibility.py +++ b/xrspatial/visibility.py @@ -268,8 +268,17 @@ def cumulative_viewshed( import dask.array as da _is_dask = isinstance(raster.data, da.Array) + # Detect cupy backend so the accumulator stays on-device and matches + # the array type that viewshed() returns for each observer. + _is_cupy = has_cuda_and_cupy() and is_cupy_array(raster.data) + if _is_dask: + # Dask is checked first, so a dask-of-cupy raster takes this branch + # and never reaches the cupy branch below. count = da.zeros(raster.shape, dtype=np.int32, chunks=raster.data.chunks) + elif _is_cupy: + import cupy as cp + count = cp.zeros(raster.shape, dtype=np.int32) else: count = np.zeros(raster.shape, dtype=np.int32)