From 790c11298d089f41bbe757e08bee0d00caf2ddac Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Tue, 9 Jun 2026 16:46:07 -0700 Subject: [PATCH 1/3] Update accuracy sweep state for reproject (#3094, #3096) --- .claude/sweep-accuracy-state.csv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.claude/sweep-accuracy-state.csv b/.claude/sweep-accuracy-state.csv index 974a9bebd..e13bddce2 100644 --- a/.claude/sweep-accuracy-state.csv +++ b/.claude/sweep-accuracy-state.csv @@ -27,7 +27,7 @@ perlin,2026-04-10T12:00:00Z,,,,Improved Perlin noise implementation correct. Fad polygon_clip,2026-04-13T12:00:00Z,1197,,,crop=True + all_touched=True drops boundary pixels. Fix in PR #1200. polygonize,2026-05-29,2606,HIGH,5,"Cat 5 HIGH: dask connectivity=8 cross-chunk merge filled diagonal notch where same-value regions meet only at a corner across a chunk boundary; total area exceeded raster. Hole ring was dropped because containment tested hole[0] (on exterior at pinch). Fixed via _ring_interior_point in PR for #2606. numpy, dask+numpy, dask+cupy area parity now holds; 4-conn was already correct. cupy + dask+cupy paths validated on GPU host. Other cats clean: NaN masked on numpy/cupy float paths (tested), _is_close handles +/-inf via exact-equality short-circuit, atol/rtol/simplify_tolerance reject NaN/inf, integer GPU CCL matches numpy." proximity,2026-05-29,2721,MEDIUM,4;5,Bounded GREAT_CIRCLE on dask (both numpy+cupy) raised ValueError: map_overlap pad depth = max_distance/cellsize mixed metre distance with degree cellsize. numpy/cupy backends fine. Fixed by measuring per-pixel pitch with active metric (PR #2722). Cat1 float32 output is documented design choice; NaN/Inf masking via np.isfinite consistent; numpy GDAL-sweep matches exact nearest and cupy brute-force on tested grids. -reproject,2026-05-29,2620,HIGH,5,"Cat5 backend inconsistency: cupy _resample_cupy (cupyx map_coordinates) diverged from numpy/native on pyproj-fallback CRS pairs (projected->projected, e.g. EPSG:32633->3857). Edge-band cval=0.0 bleed (all modes, ~534/pixel) + cubic B-spline vs Catmull-Rom (~0.45 interior). Fixed PR for #2620: route eager+dask cupy through _resample_cupy_native. Other files clean: _merge numpy/cupy structurally identical; _datum_grids/_vertical/_itrf use -0.5 pixel-center interp and self-inequality NaN checks; WGS84/GRS80 constants correct; curvature correction n/a (no geodesic gradient here). LOW (not fixed): _transform._bilinear_interp_2ch docstring claims parallel but isn't." +reproject,2026-06-09,3094,HIGH,4;5,"HIGH #3094: try_cuda_transform missing the #2651 non-WGS84 datum guard; cupy/dask+cupy 4326<->27700 (and DHDN/MGI/ED50/NAD27 entries) project with WGS84 Krueger + no datum shift, ~80-100 m coordinate error, end-to-end numpy-vs-cupy value diff 0.094 on [0,1] data with 5 NaN-mask flips (verified on GPU). MEDIUM #3096: empty-chunk returns in _reproject_chunk_numpy/_cupy and _reproject_block_adapter hardcode float64, so integer-source dask reproject computes float64 when any chunk misses the source footprint while meta advertises int (eager backend returns int). LOW (doc only): _place_same_crs accepts 1% res mismatch for direct placement, up to ~1% of tile width misregistration at the far edge on large same-CRS merge tiles. Cats 1-3 clean: f64 kernels, GDAL-style NaN renorm, correct bounds guards; projection series/constants match PROJ. CUDA available; GPU paths executed." resample,2026-05-29,2610,HIGH,3;5,"dask interp (nearest/bilinear) overlap depth=1 too small on downsample; block-centered source coord landed past chunk, map_coordinates clamped to edge -> wrong seam rows. Fixed PR #2627 via per-axis _downsample_radius. cupy+dask+cupy verified." sieve,2026-04-13T12:00:00Z,,,,Union-find CCL correct. NaN excluded from labeling. All backends funnel through _sieve_numpy. sky_view_factor,2026-05-01,1407,HIGH,4,Horizon angle ignored cell size; fixed by passing cellsize_x/cellsize_y into CPU+GPU kernels and using ground distance From 624df4af999802e15882b606125c2a867e518c84 Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Tue, 9 Jun 2026 16:52:53 -0700 Subject: [PATCH 2/3] Keep integer dtype for empty reproject chunks (#3096) The empty-chunk fills in _reproject_chunk_numpy, _reproject_chunk_cupy, and the footprint-skip path in _reproject_block_adapter were hardcoded to float64. With an integer source, one no-overlap output chunk promoted the whole computed dask array to float64 while the lazy array (and the eager backend) advertised the integer dtype. Fill empty chunks with the source integer dtype instead; the resolved nodata is guaranteed representable for integer rasters (#2185/#2572). Float sources keep float64 fills. --- xrspatial/reproject/__init__.py | 50 ++++++++++++----- xrspatial/tests/test_reproject.py | 91 +++++++++++++++++++++++++++++++ 2 files changed, 128 insertions(+), 13 deletions(-) diff --git a/xrspatial/reproject/__init__.py b/xrspatial/reproject/__init__.py index 94a623b99..ca7bab67e 100644 --- a/xrspatial/reproject/__init__.py +++ b/xrspatial/reproject/__init__.py @@ -411,10 +411,21 @@ def _reproject_chunk_numpy( else: _empty_shape = chunk_shape + # Empty-chunk fills must match the dtype the data path returns and the + # dask template advertises (#3096): integer sources round-trip back to + # their dtype, floats stay float64. Without this, a single no-overlap + # chunk promoted the whole assembled dask output to float64. The + # resolved nodata is guaranteed representable for integer rasters + # (#2185/#2572). + if np.issubdtype(source_data.dtype, np.integer): + _empty_dtype = source_data.dtype + else: + _empty_dtype = np.float64 + if not np.isfinite(r_min) or not np.isfinite(r_max): - return np.full(_empty_shape, nodata, dtype=np.float64) + return np.full(_empty_shape, nodata, dtype=_empty_dtype) if not np.isfinite(c_min) or not np.isfinite(c_max): - return np.full(_empty_shape, nodata, dtype=np.float64) + return np.full(_empty_shape, nodata, dtype=_empty_dtype) r_min = int(np.floor(r_min)) - 2 r_max = int(np.ceil(r_max)) + 3 @@ -423,7 +434,7 @@ def _reproject_chunk_numpy( # Check overlap if r_min >= src_h or r_max <= 0 or c_min >= src_w or c_max <= 0: - return np.full(_empty_shape, nodata, dtype=np.float64) + return np.full(_empty_shape, nodata, dtype=_empty_dtype) # Clip to source bounds r_min_clip = max(0, r_min) @@ -436,7 +447,7 @@ def _reproject_chunk_numpy( _MAX_WINDOW_PIXELS = 64 * 1024 * 1024 # 64 Mpix (~512 MB for float64) win_pixels = (r_max_clip - r_min_clip) * (c_max_clip - c_min_clip) if win_pixels > _MAX_WINDOW_PIXELS: - return np.full(_empty_shape, nodata, dtype=np.float64) + return np.full(_empty_shape, nodata, dtype=_empty_dtype) # Extract source window window = source_data[r_min_clip:r_max_clip, c_min_clip:c_max_clip] @@ -514,6 +525,13 @@ def _reproject_chunk_cupy( else: _empty_shape = chunk_shape + # Empty-chunk fills must match the dtype the data path returns (#3096); + # see the matching block in _reproject_chunk_numpy. + if np.issubdtype(source_data.dtype, np.integer): + _empty_dtype = source_data.dtype + else: + _empty_dtype = np.float64 + # Try CUDA transform first (keeps coordinates on-device). # transform_precision == 0 forces the exact pyproj path, so skip CUDA. cuda_result = None @@ -554,7 +572,7 @@ def _reproject_chunk_cupy( ) if not (np.isfinite(r_min_val) and np.isfinite(r_max_val) and np.isfinite(c_min_val) and np.isfinite(c_max_val)): - return cp.full(_empty_shape, nodata, dtype=cp.float64) + return cp.full(_empty_shape, nodata, dtype=_empty_dtype) r_min = int(np.floor(r_min_val)) - 2 r_max = int(np.ceil(r_max_val)) + 3 c_min = int(np.floor(c_min_val)) - 2 @@ -592,9 +610,9 @@ def _reproject_chunk_cupy( c_min = np.nanmin(src_col_px) c_max = np.nanmax(src_col_px) if not np.isfinite(r_min) or not np.isfinite(r_max): - return cp.full(_empty_shape, nodata, dtype=cp.float64) + return cp.full(_empty_shape, nodata, dtype=_empty_dtype) if not np.isfinite(c_min) or not np.isfinite(c_max): - return cp.full(_empty_shape, nodata, dtype=cp.float64) + return cp.full(_empty_shape, nodata, dtype=_empty_dtype) r_min = int(np.floor(r_min)) - 2 r_max = int(np.ceil(r_max)) + 3 c_min = int(np.floor(c_min)) - 2 @@ -602,7 +620,7 @@ def _reproject_chunk_cupy( _use_native_cuda = False if r_min >= src_h or r_max <= 0 or c_min >= src_w or c_max <= 0: - return cp.full(_empty_shape, nodata, dtype=cp.float64) + return cp.full(_empty_shape, nodata, dtype=_empty_dtype) r_min_clip = max(0, r_min) r_max_clip = min(src_h, r_max) @@ -614,7 +632,7 @@ def _reproject_chunk_cupy( _MAX_WINDOW_PIXELS = 64 * 1024 * 1024 # 64 Mpix (~512 MB for float64) win_pixels = (r_max_clip - r_min_clip) * (c_max_clip - c_min_clip) if win_pixels > _MAX_WINDOW_PIXELS: - return cp.full(_empty_shape, nodata, dtype=cp.float64) + return cp.full(_empty_shape, nodata, dtype=_empty_dtype) window = source_data[r_min_clip:r_max_clip, c_min_clip:c_max_clip] if hasattr(window, 'compute'): @@ -1967,15 +1985,21 @@ def _reproject_block_adapter( is_3d = n_bands is not None - # Skip chunks that don't overlap the source footprint + # Skip chunks that don't overlap the source footprint. The fill dtype + # must match the template: integer sources advertise their own dtype, + # so a float64 fill here would promote the assembled output (#3096). if src_footprint_tgt is not None and not _bounds_overlap(cb, src_footprint_tgt): + if np.issubdtype(source_data.dtype, np.integer): + empty_dtype = source_data.dtype + else: + empty_dtype = np.float64 if is_3d: empty_shape = (*chunk_shape, n_bands) if is_cupy: import cupy as cp - return cp.full(empty_shape, nodata, dtype=cp.float64) - return np.full(empty_shape, nodata, dtype=np.float64) - return np.full(chunk_shape, nodata, dtype=np.float64) + return cp.full(empty_shape, nodata, dtype=empty_dtype) + return np.full(empty_shape, nodata, dtype=empty_dtype) + return np.full(chunk_shape, nodata, dtype=empty_dtype) chunk_fn = _reproject_chunk_cupy if is_cupy else _reproject_chunk_numpy return chunk_fn( diff --git a/xrspatial/tests/test_reproject.py b/xrspatial/tests/test_reproject.py index 11d7a2829..4397ed074 100644 --- a/xrspatial/tests/test_reproject.py +++ b/xrspatial/tests/test_reproject.py @@ -4408,6 +4408,97 @@ def test_dask_cupy_reproject_int16_matches_dask_numpy_dtype(self): assert r_np.dtype == r_cp.dtype == np.int16 +class TestEmptyChunkDtype: + """Empty / no-overlap chunks must keep the integer source dtype (#3096). + + The empty-chunk fills in the chunk workers and the footprint-skip + path in ``_reproject_block_adapter`` were hardcoded to float64. With + an integer source, one no-overlap chunk was enough to promote the + whole computed dask array to float64 while the lazy array (and the + eager backend) advertised the integer dtype. + """ + + # Output bounds in EPSG:3857 that are far larger than the projected + # footprint of the source raster below, so corner chunks have no + # source overlap and take the empty-chunk path. + _WIDE_BOUNDS = (-2_000_000.0, 5_000_000.0, 2_000_000.0, 9_000_000.0) + + def _make_int16_data(self, n=200): + rng = np.random.default_rng(3096) + return (rng.random((n, n)) * 1000).astype(np.int16) + + def _coords(self, n=200): + return {'y': np.linspace(52.0, 51.0, n), 'x': np.linspace(-2.0, -1.0, n)} + + @pytest.mark.skipif(not HAS_DASK, reason="dask required") + def test_dask_int16_empty_chunks_keep_dtype(self): + from xrspatial.reproject import reproject + data = self._make_int16_data() + raster = xr.DataArray( + da.from_array(data, chunks=(64, 64)), + dims=['y', 'x'], coords=self._coords(), + attrs={'crs': 'EPSG:4326', 'nodata': -32768}, + ) + result = reproject(raster, 'EPSG:3857', bounds=self._WIDE_BOUNDS, + resolution=20000, chunk_size=64) + assert result.dtype == np.int16 + computed = result.compute() + assert computed.dtype == np.int16 + # The no-overlap corners must hold the integer sentinel. + assert computed.values[0, 0] == -32768 + # Some chunk must contain real data, or this test exercises + # nothing but empty fills. + assert (computed.values != -32768).any() + + def test_numpy_int16_no_overlap_output_keeps_dtype(self): + # Eager backend, output grid entirely outside the source + # footprint: the single chunk takes the no-overlap early return. + from xrspatial.reproject import reproject + data = self._make_int16_data(32) + raster = xr.DataArray( + data, dims=['y', 'x'], coords=self._coords(32), + attrs={'crs': 'EPSG:4326', 'nodata': -32768}, + ) + result = reproject(raster, 'EPSG:3857', + bounds=(5_000_000.0, 5_000_000.0, + 6_000_000.0, 6_000_000.0), + resolution=20000) + assert result.dtype == np.int16 + assert (result.values == -32768).all() + + @pytest.mark.skipif(not HAS_DASK, reason="dask required") + def test_dask_float_empty_chunks_stay_float64(self): + # Float sources keep returning float64 empty chunks (NaN fill). + from xrspatial.reproject import reproject + data = np.random.RandomState(0).rand(200, 200) + raster = xr.DataArray( + da.from_array(data, chunks=(64, 64)), + dims=['y', 'x'], coords=self._coords(), + attrs={'crs': 'EPSG:4326'}, + ) + result = reproject(raster, 'EPSG:3857', bounds=self._WIDE_BOUNDS, + resolution=20000, chunk_size=64) + assert result.dtype == np.float64 + computed = result.compute() + assert computed.dtype == np.float64 + assert np.isnan(computed.values[0, 0]) + + @pytest.mark.skipif(not (HAS_DASK and HAS_CUPY), + reason="dask + cupy required") + def test_dask_cupy_int16_empty_chunks_keep_dtype(self): + from xrspatial.reproject import reproject + data = self._make_int16_data() + raster = xr.DataArray( + da.from_array(cp.asarray(data), chunks=(64, 64)), + dims=['y', 'x'], coords=self._coords(), + attrs={'crs': 'EPSG:4326', 'nodata': -32768}, + ) + result = reproject(raster, 'EPSG:3857', bounds=self._WIDE_BOUNDS, + resolution=20000, chunk_size=64) + computed = result.compute() + assert computed.dtype == np.int16 + + @pytest.mark.skipif(not HAS_DASK, reason="dask required") class TestMergeDaskParity: """Dask merge should match the eager numpy merge.""" From 6a24085badd9a652705522d05309553a2459b5bd Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Tue, 9 Jun 2026 16:56:05 -0700 Subject: [PATCH 3/3] Address review nit: cupy fill for 2-D footprint-skip chunks (#3096) The 2-D footprint-skip path returned a numpy fill on dask+cupy while the 3-D branch and the data chunks return cupy arrays. Unify on cp.full when is_cupy so empty blocks match the rest of the graph. --- xrspatial/reproject/__init__.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/xrspatial/reproject/__init__.py b/xrspatial/reproject/__init__.py index ca7bab67e..a863b4f10 100644 --- a/xrspatial/reproject/__init__.py +++ b/xrspatial/reproject/__init__.py @@ -1993,13 +1993,13 @@ def _reproject_block_adapter( empty_dtype = source_data.dtype else: empty_dtype = np.float64 - if is_3d: - empty_shape = (*chunk_shape, n_bands) - if is_cupy: - import cupy as cp - return cp.full(empty_shape, nodata, dtype=empty_dtype) - return np.full(empty_shape, nodata, dtype=empty_dtype) - return np.full(chunk_shape, nodata, dtype=empty_dtype) + empty_shape = (*chunk_shape, n_bands) if is_3d else chunk_shape + if is_cupy: + # Match the data chunks (and the 3-D branch): dask+cupy + # blocks should be cupy arrays even when empty. + import cupy as cp + return cp.full(empty_shape, nodata, dtype=empty_dtype) + return np.full(empty_shape, nodata, dtype=empty_dtype) chunk_fn = _reproject_chunk_cupy if is_cupy else _reproject_chunk_numpy return chunk_fn(