diff --git a/.claude/sweep-api-consistency-state.csv b/.claude/sweep-api-consistency-state.csv index e07255aa9..d2714de9e 100644 --- a/.claude/sweep-api-consistency-state.csv +++ b/.claude/sweep-api-consistency-state.csv @@ -1,11 +1,11 @@ -module,last_inspected,issue,severity_max,categories_found,notes -focal,2026-05-29,2689,HIGH,1;2;3;4,"Sweep 2026-05-29 (deep-sweep-api-consistency-focal-2026-05-29). Fixed in PR #2699 (issue #2689): (HIGH Cat 1) first-arg drift raster vs agg -- apply()/hotspots() took `raster` while mean()/focal_stats() and the rest of the library (curvature/slope/aspect/hillshade/classify) take `agg`; both names live in the public API at once. Renamed apply/hotspots first arg to `agg` with a keyword-only deprecation shim (raster=None): old keyword still accepted, emits DeprecationWarning, passing both raises TypeError, positional callers untouched. (MEDIUM Cat 1+5) name= param missing on focal_stats/hotspots while mean/apply have one -- added name='focal_stats'/'hotspots'. (MEDIUM Cat 2) focal_stats output .name was inconsistent across backends (numpy leaked internal 'focal_apply', cupy returned None) -- now set consistently on numpy/cupy/dask+numpy/dask+cupy via result.name=name. (MEDIUM Cat 3) mean() docstring omitted the `excludes` param -- documented. (MEDIUM Cat 4) mutable list defaults excludes=[np.nan] and stats_funcs=[...] replaced with None sentinels. Tests: deprecation warnings, both-args TypeError, name= parity across backends incl GPU variants, default-value isolation. Documented but NOT filed per template: (LOW Cat 3) none of the focal public funcs have type hints while sibling curvature does -- library-wide gap, not per-module. (LOW cross-cutting) apply/hotspots default func vs ngjit-vs-cuda.jit constraint for cupy backend is documented in the docstring, not a consistency bug. No Cat 5 orphan API (apply/focal_stats/hotspots consumed via `from xrspatial.focal import ...` and documented in focal.rst autosummary; mean re-exported in __init__). cuda-validated: CUDA_AVAILABLE=True on this host; cupy + dask+cupy entry points smoke-tested for name= and signature parity before opening the PR." -geotiff,2026-06-09,3086,MEDIUM,3,"Sweep 2026-06-09 (deep-sweep-api-consistency-geotiff-2026-06-09). 1 MEDIUM Cat 3 finding fixed in this branch (#3086): after the mask_and_scale -> unpack rename (#3071/PR #3075), user-facing strings still presented the deprecated alias as canonical: to_geotiff's pack docstring said 'Inverse of open_geotiff(mask_and_scale=True)', and errors on the public unpack=True/pack=True paths named the alias (MalformedScaleOffsetError x2 and the MixedBandMetadataError remedy 'or drop mask_and_scale' in _attrs.py; the _pack ValueError 'not produced by open_geotiff(mask_and_scale=True)'; the to_geotiff pack ValueError in _writers/eager.py). Fix updates the docstring + error strings to name unpack. Intentionally unchanged: internal backend kwargs still named mask_and_scale (private modules) and the attrs key mask_and_scale_dtype (contract v5). Regression tests pin that the messages name unpack and not the alias (tests/read/test_rioxarray_compat_2961.py #3086 section; tests/write/test_pack_3064.py). Other categories: no Cat 1 in-module drift (reader/writer share gpu/allow_* names; masked/default_name/unpack follow rioxarray open_rasterio deliberately, with deprecated aliases mask_nodata/name/mask_and_scale that warn); no Cat 2 (open_geotiff -> DataArray, to_geotiff returns path, both documented); no Cat 4 (reader gpu=False vs writer gpu=None auto-detect is documented; no mutable defaults); no Cat 5 orphan API (public surface = open_geotiff/to_geotiff/SUPPORTED_FEATURES/errors per __all__; plot_geotiff deprecated WITH DeprecationWarning; tests/parity/test_signature_contract.py + unit/test_signatures.py already pin docstring/signature parity). LOW documented, not fixed: examples/user_guide/52_COG_Overview_Generation.ipynb imports private _header.parse_header/parse_all_ifds (no public IFD-inspection API exists). Cross-cutting documented, not filed per template: chunks (geotiff, xarray/rioxarray convention) vs chunk_size (reproject); default_name (geotiff, rioxarray-compat) vs name (reproject) is by design. cuda-validated: CUDA_AVAILABLE=True; gpu=True and gpu=True+chunks smoke-tested on open_geotiff/to_geotiff with kwarg parity and CPU/GPU pixel parity." -hydro-d8,2026-05-29,2709,HIGH,1;5,"Sweep 2026-05-29 (deep-sweep-api-consistency-hydro-d8-2026-05-29). Scope = the 13 D8-variant files only; dinf/mfd read for reference but not modified. 1 HIGH Cat 1 + 1 MEDIUM Cat 5 fixed in this branch (#2709, PR #2716). HIGH Cat 1: stream_order_d8 named its strahler/shreve selector `ordering` while sibling stream_order_dinf/stream_order_mfd use `method`; both names live in the public API and the __init__.py _StreamOrderDispatch special-cases the drift (translates ordering->method for non-d8). Fix adds `method` as an accepted alias on stream_order_d8 (case-insensitive; takes precedence; conflicting ordering+method raises ValueError), keeping `ordering` working so the out-of-scope dispatcher (passes ordering=) and existing callers are unaffected. Full rename to `method` deferred because deprecating `ordering` would warn on every stream_order(routing='d8') call via the dispatcher I cannot touch in this scope. MEDIUM Cat 5: basins_d8 (watershed_d8.py) is a backward-compat wrapper whose docstring said 'use basin instead' but emitted no warning; added DeprecationWarning(stacklevel=2). Tests added for alias parity/precedence/conflict/case-insensitivity and for the basins_d8 warning. Findings documented but NOT filed per template: (LOW Cat 1 cross-module, out of scope) dinf siblings name the first arg `flow_dir_dinf` (stream_link/flow_path/hand/watershed_dinf) while all D8 funcs use the cleaner `flow_dir`; D8 is the better convention so no D8 change -- the drift lives in the dinf files. (LOW Cat 4 defensive-validation drift) hand_d8 validates np.isfinite(threshold) but stream_link_d8/stream_order_d8 (same threshold: float = 100 param) do not; not user-facing signature surprise, document only. No Cat 2 return drift (every D8 public fn returns xr.DataArray with coords/dims/attrs preserved; Dataset in -> Dataset out via @supports_dataset). No Cat 3 missing-hints beyond fill_d8 z_limit (optional, no hint) which mirrors its sibling style. All 13 D8 funcs are re-exported in xrspatial/hydro/__init__.py (no orphan API). cuda-validated: CUDA_AVAILABLE=True on this host; method-alias parity smoke-tested on a cupy DataArray. CI: ubuntu/windows/3.12 GitHub Actions green; macOS-3.14 + ReadTheDocs slow but no failures. NOTE: the /review-pr review comment could not be posted to GitHub (auto-mode permission denial on gh pr review); review findings were applied to code instead (case-insensitive conflict check + str|None hint, commit f8467320)." -polygonize,2026-05-19,2148,HIGH,1;3,"Sweep 2026-05-19 (deep-sweep-api-consistency-polygonize-2026-05-19). 1 MEDIUM Cat 3 finding fixed in this branch (#2148): polygonize() was the only public vector/raster conversion function without a return type annotation. Sieve/contours/rasterize/clip_polygon all declare one. Fix adds a Union return annotation (numpy tuple | awkward tuple | geopandas GeoDataFrame | spatialpandas GeoDataFrame | geojson dict) using TYPE_CHECKING forward refs for optional deps, and expands the docstring Returns section to enumerate the per-return_type shapes. 1 HIGH Cat 1 finding NOT fixed in this PR -- cross-module rename: polygonize uses `connectivity` (int 4|8) while sieve uses `neighborhood` (int 4|8) for the identical rook/queen pixel-connectivity concept. Industry convention (GDAL, rasterio.features.sieve) favours `connectivity`; the deprecation shim belongs in sieve.py, not polygonize, so this is out of scope for the polygonize-scoped sweep branch. Documented here for the next sieve sweep pass. 1 LOW Cat 1 cross-cutting: polygonize/sieve/clip_polygon use `raster` while contours and many older modules use `agg` for the input DataArray -- library-wide drift, not filed per-module per sweep template. Cat 2 return-shape: polygonize returns tuple/GeoDataFrame/dict by return_type; consistent with contours' tuple/GeoDataFrame dispatch. No Cat 4 (no mutable defaults; connectivity=4 default matches sieve neighborhood=4 default). No Cat 5 (polygonize re-exported in xrspatial/__init__.py; no orphan API; no __all__ but consistent with module convention). cuda-validated: cupy backend accepts identical kwargs, smoke-tested with cupy DataArray on host with CUDA_AVAILABLE." -proximity,2026-06-09,3090;3091,HIGH,2;3,"Sweep 2026-06-09 (deep-sweep-api-consistency-proximity-2026-06-09). 1 HIGH Cat 2 finding (#3090): dask+numpy (and unbounded dask+cupy, which converts to it) KDTree path violates the documented lowest-flat-index tie-break in allocation()/direction() whenever the raster has >1 chunk column. _collect_region_targets concatenates targets chunk-major (iy outer, ix inner) so the tree's target order is not global row-major; _kdtree_query_lowest_index then ties to the wrong target. Existing tie-break tests put both targets in the same raster row where chunk order coincides with row-major, so they pass. Repro: 5x5, targets 2@(1,3) and 3@(2,2), chunks (5,3), pixel (2,3) tied at d=1 -> numpy gives 2, dask gives 3. Bounded map_overlap paths are fine (local row-major order is offset-invariant). 1 MEDIUM Cat 3 finding (#3091): all 3 public docstrings claim numpy + dask+numpy support only while cupy/dask+cupy backends exist, are dispatched, and are tested (the tie-break paragraphs in the same docstrings name all 4 backends); direction() opens with a stray copy-pasted slope line ('downward slope direction') plus a doubled 'the the'; allocation example output reads as float64 but the function returns float32; stale '# convert to have same type as of input @raster' comment. Within-module Cat 1/4/5 clean: proximity/allocation/direction share an identical signature (raster, x='x', y='y', target_values=None, max_distance=np.inf, distance_metric='EUCLIDEAN'); consistent with surface_distance siblings (raster/x/y/target_values/max_distance); all 6 public symbols (incl. euclidean/manhattan/great_circle_distance) re-exported in __init__.py, no orphan API. Cross-cutting, documented not filed: sibling distance modules (surface_distance, cost_distance, balanced_allocation) use mutable default target_values: list = [] while proximity uses the None sentinel - the mutable-default fix belongs to those modules; proximity's target_values: list = None hint would be more precise as Optional[list] (LOW, matches library style). cuda-validated: CUDA_AVAILABLE=True on this host; proximity/allocation/direction smoke-tested with identical kwargs on numpy, cupy, dask+numpy, dask+cupy (proximity parity passed; allocation/direction parity failure is finding #3090)." -rasterize,2026-06-09,3089,HIGH,1,"Sweep 2026-06-09 (deep-sweep-api-consistency-rasterize-2026-06-09). 1 HIGH Cat 1 fixed in this branch (#3089): rasterize(use_cuda=) vs open_geotiff(gpu=) named the identical GPU-backend opt-in differently; these are the only two public entry points with an explicit GPU boolean (no input array to dispatch on; both pair it with chunks= for dask) and both names were live in the public API at once. Fix renames the positional param to gpu (same slot, positional callers unaffected) and appends use_cuda=None as a deprecated alias: DeprecationWarning on use, TypeError when combined with gpu=True. Docstring, GPU merge warning text, CuPy ImportError text, and polygon_clip.py's internal dask+cupy caller updated (guarded so a legacy use_cuda in rasterize_kw does not collide with the new default); all rasterize test call sites migrated to gpu=; regression tests in test_rasterize_gpu_alias_3089.py pin slot position, warning, TypeError, backend parity, and the warning-free clip_polygon path. Re-inspection after the 2026-05-21 pass (#2250); prior cross-module notes (clip_polygon nodata vs fill, name default drift, polygonize column_name vs column) still documented-only. Docstring/signature parity verified programmatically (17/17 params, order matches). New params since last pass (check_crs, max_pixels) consistent with geotiff naming (max_pixels matches geotiff's). No Cat 2/4/5 findings. LOW noted, not fixed (other module's docs): docs/source/user_guide/focal.ipynb claims convolve_2d takes use_cuda, which it does not. cuda-validated: CUDA_AVAILABLE=True; numpy/cupy/dask+numpy/dask+cupy smoke-tested with identical kwargs, values equal." -reproject,2026-06-09,3095;3097,HIGH,1;2;3,"Sweep 2026-06-09 (deep-sweep-api-consistency-reproject-2026-06-09). 2 findings filed and fixed: #3095 -> PR #3125, #3097 -> PR #3134 (branches -01/-02 off this one). (HIGH Cat 2, #3095) merge() raises TypeError ('Implicit conversion to a NumPy array is not allowed') on cupy-backed inputs while sibling reproject() supports numpy/cupy/dask+numpy/dask+cupy; crash site _merge_inmemory info['raster'].values (__init__.py:2572); dask-of-cupy fails the same way at compute via _merge_block_adapter -> _reproject_chunk_numpy/np.asarray. _merge.py has a complete _merge_arrays_cupy that is imported in __init__.py:38 but never called (dead GPU plumbing; the unused import alone is lint issue #3083 from the style sweep). Fix: host round-trip on entry (same pattern as _apply_vertical_shift), GPU result out, docstring documents backend handling. (MEDIUM Cat 3, #3097) _vertical.py Returns docstrings claim 'same type as input/height' but geoid_height(DataArray) returns np.ndarray (verified empirically) and the four conversion wrappers return np.float64/np.ndarray; geoid_height converts scalars to Python float but the wrappers do not (sibling scalar-return drift). Docs-only fix. Documented but NOT fixed: (LOW Cat 1) itrf_transform(src=/tgt=) abbreviations vs source_/target_ elsewhere -- prior 2026-05-29 sweep already weighed this and left it as-is (frames, not CRSes); filed #3099 before noticing the prior disposition, then closed it as not-planned to avoid churn. (LOW Cat 5) module docstring 'Public API' section lists only reproject/merge while __all__ exports 10 names (vertical+itrf funcs invisible in help() header; docs/source/reference/reproject.rst autosummary likewise lists only reproject/merge). Cross-cutting, notes only per template: raster/rasters (reproject) vs agg (terrain family) vs source (geotiff); chunk_size (reproject/merge) vs chunks (open_geotiff); resampling+resolution (reproject/merge/accessor) vs method+target_resolution (resample.py -- resample is the outlier, belongs to a resample-module pass, already in resample row's notes). No Cat 4 default drift (resampling='bilinear'/transform_precision=16/chunk_size=None/bounds_policy='auto'/model='EGM96' consistent across siblings). reproject()/merge() kwarg parity smoke-tested on numpy AND cupy DataArrays (merge cupy crash found exactly there). cuda-validated: CUDA_AVAILABLE=True on this host. CI: all GitHub Actions checks green on both PRs; RTD flapped (pending on #3125, fail on #3134 -- repo-wide backlog, change not docs-rendered); PRs left BLOCKED on REVIEW_REQUIRED for the user to merge." -resample,2026-05-27,2544,MEDIUM,3,"Sweep 2026-05-27 (deep-sweep-api-consistency-resample-2026-05-27). 1 MEDIUM Cat 3 finding fixed in this branch (#2544): resample() was the only public symbol in xrspatial.resample without type annotations on any parameter or return; siblings slope/aspect/hillshade/curvature all annotate `agg: xr.DataArray` and `-> xr.DataArray`. Fix adds annotations matching the docstring (agg: xr.DataArray; scale_factor / target_resolution: float | tuple[float, float] | None; method: str; nodata: float | None; name: str) and a `-> xr.DataArray` return type, plus a docstring note that the @supports_dataset decorator accepts Dataset too. Regression test test_resample_signature_annot_2544.py pins every param and the return annotation. Other findings documented but not filed per template: (MEDIUM Cat 1 cross-module) `method` (resample) vs `resampling` (reproject/merge) -- same conceptual parameter, different name, cross-cutting rename, needs design issue. (LOW Cat 1 cross-cutting) first-arg `agg` (resample/slope/aspect/...) vs `raster` (reproject/rasterize/polygonize/sieve) -- library-wide drift, not per-module. (LOW Cat 5) ALL_METHODS imported by tests but not in __all__ (module has no __all__); borderline orphan but used for test parametrisation only. No Cat 2 (returns xr.DataArray as documented). No Cat 4 mutable defaults. resample is exported in xrspatial/__init__.py. cuda-validated: cupy backend smoke-tested with nearest, bilinear, and average on host with CUDA_AVAILABLE=True." -slope,2026-05-29,2681,MEDIUM,3,"Sweep 2026-05-29 (deep-sweep-api-consistency-slope-2026-05-29). 1 MEDIUM Cat 3 finding fixed in this branch (#2681, PR #2687): slope() annotated name as `str` while every terrain-family sibling (aspect/northness/eastness in aspect.py, curvature in curvature.py) uses Optional[str]. name flows into xr.DataArray(name=name) which accepts None, so slope(agg, name=None) already worked at runtime -- the annotation was just wrong and inconsistent. Fix widens to Optional[str] and imports Optional (module previously imported only Union). Non-breaking (type-hint widening), no deprecation shim. Added test_name_annotation_matches_terrain_family (pins parity vs the 4 siblings via get_type_hints, unwrapping @supports_dataset) and test_name_none_accepted (slope(agg, name=None).name is None). Full test_slope.py passes (43). No backend logic touched -- numpy/cupy/dask+numpy/dask+cupy paths unchanged; public signature is shared across backends via ArrayTypeFunctionMapping. Other categories: no Cat 1 in-module rename (slope/aspect share identical public param names agg/name/method/z_unit/boundary); no Cat 2 return drift (returns xr.DataArray/Dataset via @supports_dataset, same coords/dims/attrs convention as siblings); no Cat 4 default drift (name/method='planar'/z_unit='meter'/boundary='nan' match across the family); no Cat 5 orphan API (slope re-exported in __init__.py, documented, no __all__ but consistent with module convention). Cross-cutting (documented, not filed per template): first-arg `agg` (slope/aspect/curvature) vs `raster` (reproject/rasterize/polygonize) is library-wide drift. cuda-validated: CUDA_AVAILABLE=True on this host; cupy slope smoke-tested (planar) and signature parity confirmed between numpy and cupy entry points." -zonal,2026-06-10,3188,MEDIUM,1;3;5,"Re-sweep 2026-06-10 (deep-sweep-api-consistency-zonal-2026-06-10). Prior sweep's HIGH zones_ids/zone_ids typo confirmed already fixed on main (#2521). Several previously-documented MEDIUM Cat 3 items also fixed on main since 2026-05-27: crosstab layer docstring now says default=None; hypsometric_integral now has param+return annotations; apply now has -> xr.DataArray. Two remaining safe Cat 3 fixes filed+PR'd this run (issue #3188 / PR #3196): (1) crosstab zone_ids/cat_ids annotated List[...]=None -> wrapped in Optional[...] to match stats()/crop(); (2) crosstab nodata_values docstring said 'Cells with nodata' (copy-paste from apply) -> now references nodata_values. Non-breaking, 17 crosstab tests pass. Documented-not-fixed: (MEDIUM Cat 1) nodata vs nodata_values drift across stats/crosstab (nodata_values, default None, filters VALUES raster) vs apply/hypsometric_integral (nodata, default 0, filters ZONES raster) -- names differ but so do the concepts and defaults, so a blanket rename would conflate two distinct meanings; needs a design decision, not a mechanical shim. (MEDIUM Cat 5) get_full_extent has a public-style docstring+example but is not in __init__.py -- borderline orphan, minor utility, left as-is. (LOW Cat 3) crop() lacks a return type annotation while stats/crosstab/apply/regions/trim annotate theirs. Cross-cutting (not filed): first-arg name varies (stats/crosstab/crop use zones; regions/trim use raster) but regions/trim operate on the raster itself so the name matches the role; library-wide agg vs raster vs values naming spans 20+ modules, out of per-module scope. cuda-validated: CUDA_AVAILABLE=True on this host." +module,last_inspected,issue,severity_max,categories_found,notes +focal,2026-05-29,2689,HIGH,1;2;3;4,"Sweep 2026-05-29 (deep-sweep-api-consistency-focal-2026-05-29). Fixed in PR #2699 (issue #2689): (HIGH Cat 1) first-arg drift raster vs agg -- apply()/hotspots() took `raster` while mean()/focal_stats() and the rest of the library (curvature/slope/aspect/hillshade/classify) take `agg`; both names live in the public API at once. Renamed apply/hotspots first arg to `agg` with a keyword-only deprecation shim (raster=None): old keyword still accepted, emits DeprecationWarning, passing both raises TypeError, positional callers untouched. (MEDIUM Cat 1+5) name= param missing on focal_stats/hotspots while mean/apply have one -- added name='focal_stats'/'hotspots'. (MEDIUM Cat 2) focal_stats output .name was inconsistent across backends (numpy leaked internal 'focal_apply', cupy returned None) -- now set consistently on numpy/cupy/dask+numpy/dask+cupy via result.name=name. (MEDIUM Cat 3) mean() docstring omitted the `excludes` param -- documented. (MEDIUM Cat 4) mutable list defaults excludes=[np.nan] and stats_funcs=[...] replaced with None sentinels. Tests: deprecation warnings, both-args TypeError, name= parity across backends incl GPU variants, default-value isolation. Documented but NOT filed per template: (LOW Cat 3) none of the focal public funcs have type hints while sibling curvature does -- library-wide gap, not per-module. (LOW cross-cutting) apply/hotspots default func vs ngjit-vs-cuda.jit constraint for cupy backend is documented in the docstring, not a consistency bug. No Cat 5 orphan API (apply/focal_stats/hotspots consumed via `from xrspatial.focal import ...` and documented in focal.rst autosummary; mean re-exported in __init__). cuda-validated: CUDA_AVAILABLE=True on this host; cupy + dask+cupy entry points smoke-tested for name= and signature parity before opening the PR." +geotiff,2026-06-09,3086,MEDIUM,3,"Sweep 2026-06-09 (deep-sweep-api-consistency-geotiff-2026-06-09). 1 MEDIUM Cat 3 finding fixed in this branch (#3086): after the mask_and_scale -> unpack rename (#3071/PR #3075), user-facing strings still presented the deprecated alias as canonical: to_geotiff's pack docstring said 'Inverse of open_geotiff(mask_and_scale=True)', and errors on the public unpack=True/pack=True paths named the alias (MalformedScaleOffsetError x2 and the MixedBandMetadataError remedy 'or drop mask_and_scale' in _attrs.py; the _pack ValueError 'not produced by open_geotiff(mask_and_scale=True)'; the to_geotiff pack ValueError in _writers/eager.py). Fix updates the docstring + error strings to name unpack. Intentionally unchanged: internal backend kwargs still named mask_and_scale (private modules) and the attrs key mask_and_scale_dtype (contract v5). Regression tests pin that the messages name unpack and not the alias (tests/read/test_rioxarray_compat_2961.py #3086 section; tests/write/test_pack_3064.py). Other categories: no Cat 1 in-module drift (reader/writer share gpu/allow_* names; masked/default_name/unpack follow rioxarray open_rasterio deliberately, with deprecated aliases mask_nodata/name/mask_and_scale that warn); no Cat 2 (open_geotiff -> DataArray, to_geotiff returns path, both documented); no Cat 4 (reader gpu=False vs writer gpu=None auto-detect is documented; no mutable defaults); no Cat 5 orphan API (public surface = open_geotiff/to_geotiff/SUPPORTED_FEATURES/errors per __all__; plot_geotiff deprecated WITH DeprecationWarning; tests/parity/test_signature_contract.py + unit/test_signatures.py already pin docstring/signature parity). LOW documented, not fixed: examples/user_guide/52_COG_Overview_Generation.ipynb imports private _header.parse_header/parse_all_ifds (no public IFD-inspection API exists). Cross-cutting documented, not filed per template: chunks (geotiff, xarray/rioxarray convention) vs chunk_size (reproject); default_name (geotiff, rioxarray-compat) vs name (reproject) is by design. cuda-validated: CUDA_AVAILABLE=True; gpu=True and gpu=True+chunks smoke-tested on open_geotiff/to_geotiff with kwarg parity and CPU/GPU pixel parity." +hydro-d8,2026-05-29,2709,HIGH,1;5,"Sweep 2026-05-29 (deep-sweep-api-consistency-hydro-d8-2026-05-29). Scope = the 13 D8-variant files only; dinf/mfd read for reference but not modified. 1 HIGH Cat 1 + 1 MEDIUM Cat 5 fixed in this branch (#2709, PR #2716). HIGH Cat 1: stream_order_d8 named its strahler/shreve selector `ordering` while sibling stream_order_dinf/stream_order_mfd use `method`; both names live in the public API and the __init__.py _StreamOrderDispatch special-cases the drift (translates ordering->method for non-d8). Fix adds `method` as an accepted alias on stream_order_d8 (case-insensitive; takes precedence; conflicting ordering+method raises ValueError), keeping `ordering` working so the out-of-scope dispatcher (passes ordering=) and existing callers are unaffected. Full rename to `method` deferred because deprecating `ordering` would warn on every stream_order(routing='d8') call via the dispatcher I cannot touch in this scope. MEDIUM Cat 5: basins_d8 (watershed_d8.py) is a backward-compat wrapper whose docstring said 'use basin instead' but emitted no warning; added DeprecationWarning(stacklevel=2). Tests added for alias parity/precedence/conflict/case-insensitivity and for the basins_d8 warning. Findings documented but NOT filed per template: (LOW Cat 1 cross-module, out of scope) dinf siblings name the first arg `flow_dir_dinf` (stream_link/flow_path/hand/watershed_dinf) while all D8 funcs use the cleaner `flow_dir`; D8 is the better convention so no D8 change -- the drift lives in the dinf files. (LOW Cat 4 defensive-validation drift) hand_d8 validates np.isfinite(threshold) but stream_link_d8/stream_order_d8 (same threshold: float = 100 param) do not; not user-facing signature surprise, document only. No Cat 2 return drift (every D8 public fn returns xr.DataArray with coords/dims/attrs preserved; Dataset in -> Dataset out via @supports_dataset). No Cat 3 missing-hints beyond fill_d8 z_limit (optional, no hint) which mirrors its sibling style. All 13 D8 funcs are re-exported in xrspatial/hydro/__init__.py (no orphan API). cuda-validated: CUDA_AVAILABLE=True on this host; method-alias parity smoke-tested on a cupy DataArray. CI: ubuntu/windows/3.12 GitHub Actions green; macOS-3.14 + ReadTheDocs slow but no failures. NOTE: the /review-pr review comment could not be posted to GitHub (auto-mode permission denial on gh pr review); review findings were applied to code instead (case-insensitive conflict check + str|None hint, commit f8467320)." +polygonize,2026-05-19,2148,HIGH,1;3,"Sweep 2026-05-19 (deep-sweep-api-consistency-polygonize-2026-05-19). 1 MEDIUM Cat 3 finding fixed in this branch (#2148): polygonize() was the only public vector/raster conversion function without a return type annotation. Sieve/contours/rasterize/clip_polygon all declare one. Fix adds a Union return annotation (numpy tuple | awkward tuple | geopandas GeoDataFrame | spatialpandas GeoDataFrame | geojson dict) using TYPE_CHECKING forward refs for optional deps, and expands the docstring Returns section to enumerate the per-return_type shapes. 1 HIGH Cat 1 finding NOT fixed in this PR -- cross-module rename: polygonize uses `connectivity` (int 4|8) while sieve uses `neighborhood` (int 4|8) for the identical rook/queen pixel-connectivity concept. Industry convention (GDAL, rasterio.features.sieve) favours `connectivity`; the deprecation shim belongs in sieve.py, not polygonize, so this is out of scope for the polygonize-scoped sweep branch. Documented here for the next sieve sweep pass. 1 LOW Cat 1 cross-cutting: polygonize/sieve/clip_polygon use `raster` while contours and many older modules use `agg` for the input DataArray -- library-wide drift, not filed per-module per sweep template. Cat 2 return-shape: polygonize returns tuple/GeoDataFrame/dict by return_type; consistent with contours' tuple/GeoDataFrame dispatch. No Cat 4 (no mutable defaults; connectivity=4 default matches sieve neighborhood=4 default). No Cat 5 (polygonize re-exported in xrspatial/__init__.py; no orphan API; no __all__ but consistent with module convention). cuda-validated: cupy backend accepts identical kwargs, smoke-tested with cupy DataArray on host with CUDA_AVAILABLE." +proximity,2026-06-09,3090;3091,HIGH,2;3,"Sweep 2026-06-09 (deep-sweep-api-consistency-proximity-2026-06-09). 1 HIGH Cat 2 finding (#3090): dask+numpy (and unbounded dask+cupy, which converts to it) KDTree path violates the documented lowest-flat-index tie-break in allocation()/direction() whenever the raster has >1 chunk column. _collect_region_targets concatenates targets chunk-major (iy outer, ix inner) so the tree's target order is not global row-major; _kdtree_query_lowest_index then ties to the wrong target. Existing tie-break tests put both targets in the same raster row where chunk order coincides with row-major, so they pass. Repro: 5x5, targets 2@(1,3) and 3@(2,2), chunks (5,3), pixel (2,3) tied at d=1 -> numpy gives 2, dask gives 3. Bounded map_overlap paths are fine (local row-major order is offset-invariant). 1 MEDIUM Cat 3 finding (#3091): all 3 public docstrings claim numpy + dask+numpy support only while cupy/dask+cupy backends exist, are dispatched, and are tested (the tie-break paragraphs in the same docstrings name all 4 backends); direction() opens with a stray copy-pasted slope line ('downward slope direction') plus a doubled 'the the'; allocation example output reads as float64 but the function returns float32; stale '# convert to have same type as of input @raster' comment. Within-module Cat 1/4/5 clean: proximity/allocation/direction share an identical signature (raster, x='x', y='y', target_values=None, max_distance=np.inf, distance_metric='EUCLIDEAN'); consistent with surface_distance siblings (raster/x/y/target_values/max_distance); all 6 public symbols (incl. euclidean/manhattan/great_circle_distance) re-exported in __init__.py, no orphan API. Cross-cutting, documented not filed: sibling distance modules (surface_distance, cost_distance, balanced_allocation) use mutable default target_values: list = [] while proximity uses the None sentinel - the mutable-default fix belongs to those modules; proximity's target_values: list = None hint would be more precise as Optional[list] (LOW, matches library style). cuda-validated: CUDA_AVAILABLE=True on this host; proximity/allocation/direction smoke-tested with identical kwargs on numpy, cupy, dask+numpy, dask+cupy (proximity parity passed; allocation/direction parity failure is finding #3090)." +rasterize,2026-06-09,3089,HIGH,1,"Sweep 2026-06-09 (deep-sweep-api-consistency-rasterize-2026-06-09). 1 HIGH Cat 1 fixed in this branch (#3089): rasterize(use_cuda=) vs open_geotiff(gpu=) named the identical GPU-backend opt-in differently; these are the only two public entry points with an explicit GPU boolean (no input array to dispatch on; both pair it with chunks= for dask) and both names were live in the public API at once. Fix renames the positional param to gpu (same slot, positional callers unaffected) and appends use_cuda=None as a deprecated alias: DeprecationWarning on use, TypeError when combined with gpu=True. Docstring, GPU merge warning text, CuPy ImportError text, and polygon_clip.py's internal dask+cupy caller updated (guarded so a legacy use_cuda in rasterize_kw does not collide with the new default); all rasterize test call sites migrated to gpu=; regression tests in test_rasterize_gpu_alias_3089.py pin slot position, warning, TypeError, backend parity, and the warning-free clip_polygon path. Re-inspection after the 2026-05-21 pass (#2250); prior cross-module notes (clip_polygon nodata vs fill, name default drift, polygonize column_name vs column) still documented-only. Docstring/signature parity verified programmatically (17/17 params, order matches). New params since last pass (check_crs, max_pixels) consistent with geotiff naming (max_pixels matches geotiff's). No Cat 2/4/5 findings. LOW noted, not fixed (other module's docs): docs/source/user_guide/focal.ipynb claims convolve_2d takes use_cuda, which it does not. cuda-validated: CUDA_AVAILABLE=True; numpy/cupy/dask+numpy/dask+cupy smoke-tested with identical kwargs, values equal." +reproject,2026-06-09,3095;3097,HIGH,1;2;3,"Sweep 2026-06-09 (deep-sweep-api-consistency-reproject-2026-06-09). 2 findings filed and fixed: #3095 -> PR #3125, #3097 -> PR #3134 (branches -01/-02 off this one). (HIGH Cat 2, #3095) merge() raises TypeError ('Implicit conversion to a NumPy array is not allowed') on cupy-backed inputs while sibling reproject() supports numpy/cupy/dask+numpy/dask+cupy; crash site _merge_inmemory info['raster'].values (__init__.py:2572); dask-of-cupy fails the same way at compute via _merge_block_adapter -> _reproject_chunk_numpy/np.asarray. _merge.py has a complete _merge_arrays_cupy that is imported in __init__.py:38 but never called (dead GPU plumbing; the unused import alone is lint issue #3083 from the style sweep). Fix: host round-trip on entry (same pattern as _apply_vertical_shift), GPU result out, docstring documents backend handling. (MEDIUM Cat 3, #3097) _vertical.py Returns docstrings claim 'same type as input/height' but geoid_height(DataArray) returns np.ndarray (verified empirically) and the four conversion wrappers return np.float64/np.ndarray; geoid_height converts scalars to Python float but the wrappers do not (sibling scalar-return drift). Docs-only fix. Documented but NOT fixed: (LOW Cat 1) itrf_transform(src=/tgt=) abbreviations vs source_/target_ elsewhere -- prior 2026-05-29 sweep already weighed this and left it as-is (frames, not CRSes); filed #3099 before noticing the prior disposition, then closed it as not-planned to avoid churn. (LOW Cat 5) module docstring 'Public API' section lists only reproject/merge while __all__ exports 10 names (vertical+itrf funcs invisible in help() header; docs/source/reference/reproject.rst autosummary likewise lists only reproject/merge). Cross-cutting, notes only per template: raster/rasters (reproject) vs agg (terrain family) vs source (geotiff); chunk_size (reproject/merge) vs chunks (open_geotiff); resampling+resolution (reproject/merge/accessor) vs method+target_resolution (resample.py -- resample is the outlier, belongs to a resample-module pass, already in resample row's notes). No Cat 4 default drift (resampling='bilinear'/transform_precision=16/chunk_size=None/bounds_policy='auto'/model='EGM96' consistent across siblings). reproject()/merge() kwarg parity smoke-tested on numpy AND cupy DataArrays (merge cupy crash found exactly there). cuda-validated: CUDA_AVAILABLE=True on this host. CI: all GitHub Actions checks green on both PRs; RTD flapped (pending on #3125, fail on #3134 -- repo-wide backlog, change not docs-rendered); PRs left BLOCKED on REVIEW_REQUIRED for the user to merge." +resample,2026-05-27,2544,MEDIUM,3,"Sweep 2026-05-27 (deep-sweep-api-consistency-resample-2026-05-27). 1 MEDIUM Cat 3 finding fixed in this branch (#2544): resample() was the only public symbol in xrspatial.resample without type annotations on any parameter or return; siblings slope/aspect/hillshade/curvature all annotate `agg: xr.DataArray` and `-> xr.DataArray`. Fix adds annotations matching the docstring (agg: xr.DataArray; scale_factor / target_resolution: float | tuple[float, float] | None; method: str; nodata: float | None; name: str) and a `-> xr.DataArray` return type, plus a docstring note that the @supports_dataset decorator accepts Dataset too. Regression test test_resample_signature_annot_2544.py pins every param and the return annotation. Other findings documented but not filed per template: (MEDIUM Cat 1 cross-module) `method` (resample) vs `resampling` (reproject/merge) -- same conceptual parameter, different name, cross-cutting rename, needs design issue. (LOW Cat 1 cross-cutting) first-arg `agg` (resample/slope/aspect/...) vs `raster` (reproject/rasterize/polygonize/sieve) -- library-wide drift, not per-module. (LOW Cat 5) ALL_METHODS imported by tests but not in __all__ (module has no __all__); borderline orphan but used for test parametrisation only. No Cat 2 (returns xr.DataArray as documented). No Cat 4 mutable defaults. resample is exported in xrspatial/__init__.py. cuda-validated: cupy backend smoke-tested with nearest, bilinear, and average on host with CUDA_AVAILABLE=True." +slope,2026-05-29,2681,MEDIUM,3,"Sweep 2026-05-29 (deep-sweep-api-consistency-slope-2026-05-29). 1 MEDIUM Cat 3 finding fixed in this branch (#2681, PR #2687): slope() annotated name as `str` while every terrain-family sibling (aspect/northness/eastness in aspect.py, curvature in curvature.py) uses Optional[str]. name flows into xr.DataArray(name=name) which accepts None, so slope(agg, name=None) already worked at runtime -- the annotation was just wrong and inconsistent. Fix widens to Optional[str] and imports Optional (module previously imported only Union). Non-breaking (type-hint widening), no deprecation shim. Added test_name_annotation_matches_terrain_family (pins parity vs the 4 siblings via get_type_hints, unwrapping @supports_dataset) and test_name_none_accepted (slope(agg, name=None).name is None). Full test_slope.py passes (43). No backend logic touched -- numpy/cupy/dask+numpy/dask+cupy paths unchanged; public signature is shared across backends via ArrayTypeFunctionMapping. Other categories: no Cat 1 in-module rename (slope/aspect share identical public param names agg/name/method/z_unit/boundary); no Cat 2 return drift (returns xr.DataArray/Dataset via @supports_dataset, same coords/dims/attrs convention as siblings); no Cat 4 default drift (name/method='planar'/z_unit='meter'/boundary='nan' match across the family); no Cat 5 orphan API (slope re-exported in __init__.py, documented, no __all__ but consistent with module convention). Cross-cutting (documented, not filed per template): first-arg `agg` (slope/aspect/curvature) vs `raster` (reproject/rasterize/polygonize) is library-wide drift. cuda-validated: CUDA_AVAILABLE=True on this host; cupy slope smoke-tested (planar) and signature parity confirmed between numpy and cupy entry points." +zonal,2026-06-10,3188,MEDIUM,1;3;5,"Re-sweep 2026-06-10 (deep-sweep-api-consistency-zonal-2026-06-10). Prior sweep's HIGH zones_ids/zone_ids typo confirmed already fixed on main (#2521). Several previously-documented MEDIUM Cat 3 items also fixed on main since 2026-05-27: crosstab layer docstring now says default=None; hypsometric_integral now has param+return annotations; apply now has -> xr.DataArray. Two remaining safe Cat 3 fixes filed+PR'd this run (issue #3188 / PR #3196): (1) crosstab zone_ids/cat_ids annotated List[...]=None -> wrapped in Optional[...] to match stats()/crop(); (2) crosstab nodata_values docstring said 'Cells with nodata' (copy-paste from apply) -> now references nodata_values. Non-breaking, 17 crosstab tests pass. Documented-not-fixed: (MEDIUM Cat 1) nodata vs nodata_values drift across stats/crosstab (nodata_values, default None, filters VALUES raster) vs apply/hypsometric_integral (nodata, default 0, filters ZONES raster) -- names differ but so do the concepts and defaults, so a blanket rename would conflate two distinct meanings; needs a design decision, not a mechanical shim. (MEDIUM Cat 5) get_full_extent has a public-style docstring+example but is not in __init__.py -- borderline orphan, minor utility, left as-is. (LOW Cat 3) crop() lacks a return type annotation while stats/crosstab/apply/regions/trim annotate theirs. Cross-cutting (not filed): first-arg name varies (stats/crosstab/crop use zones; regions/trim use raster) but regions/trim operate on the raster itself so the name matches the role; library-wide agg vs raster vs values naming spans 20+ modules, out of per-module scope. cuda-validated: CUDA_AVAILABLE=True on this host." diff --git a/xrspatial/reproject/_vertical.py b/xrspatial/reproject/_vertical.py index f569f98a8..01c344e7e 100644 --- a/xrspatial/reproject/_vertical.py +++ b/xrspatial/reproject/_vertical.py @@ -212,9 +212,13 @@ def geoid_height(lon, lat, model='EGM96'): Returns ------- - N : same type as input + N : float or numpy.ndarray Geoid undulation in metres. Positive means the geoid is above - the ellipsoid. + the ellipsoid. A Python ``float`` when both *lon* and *lat* are + scalars; otherwise a ``numpy.ndarray`` with the same shape as + the inputs. Array-like and ``xr.DataArray`` inputs both come + back as a plain ndarray (coords and attrs are not carried + through). Examples -------- @@ -330,8 +334,10 @@ def ellipsoidal_to_orthometric(height, lon, lat, model='EGM96'): Returns ------- - H : same type as height - Orthometric height in metres. + H : numpy.ndarray or numpy scalar + Orthometric height in metres. The input is passed through + ``np.asarray``, so scalar input returns a numpy scalar and + array-like or ``xr.DataArray`` input returns a plain ndarray. """ N = geoid_height(lon, lat, model) return np.asarray(height) - N @@ -353,8 +359,10 @@ def orthometric_to_ellipsoidal(height, lon, lat, model='EGM96'): Returns ------- - h : same type as height - Ellipsoidal height in metres. + h : numpy.ndarray or numpy scalar + Ellipsoidal height in metres. The input is passed through + ``np.asarray``, so scalar input returns a numpy scalar and + array-like or ``xr.DataArray`` input returns a plain ndarray. """ N = geoid_height(lon, lat, model) return np.asarray(height) + N @@ -378,8 +386,11 @@ def depth_to_ellipsoidal(depth, lon, lat, model='EGM96'): Returns ------- - h : same type as depth - Ellipsoidal height in metres (negative below ellipsoid). + h : numpy.ndarray or numpy scalar + Ellipsoidal height in metres (negative below ellipsoid). The + input is passed through ``np.asarray``, so scalar input returns + a numpy scalar and array-like or ``xr.DataArray`` input returns + a plain ndarray. """ N = geoid_height(lon, lat, model) return -np.asarray(depth) + N @@ -403,8 +414,11 @@ def ellipsoidal_to_depth(height, lon, lat, model='EGM96'): Returns ------- - depth : same type as height - Depth below chart datum in metres (positive downward). + depth : numpy.ndarray or numpy scalar + Depth below chart datum in metres (positive downward). The + input is passed through ``np.asarray``, so scalar input returns + a numpy scalar and array-like or ``xr.DataArray`` input returns + a plain ndarray. """ N = geoid_height(lon, lat, model) return N - np.asarray(height) diff --git a/xrspatial/tests/test_reproject.py b/xrspatial/tests/test_reproject.py index fc2b8de57..29dd2d854 100644 --- a/xrspatial/tests/test_reproject.py +++ b/xrspatial/tests/test_reproject.py @@ -7285,6 +7285,51 @@ def test_source_coords_match_pyproj_for_osgb36(self): assert np.all(np.abs(src_y) > 1000.0) +class TestVerticalReturnTypes: + """Pin the return types the _vertical.py docstrings describe (#3097). + + The Returns sections used to claim "same type as input", which was + wrong for DataArray input (plain ndarray comes back) and for scalar + input to the conversion wrappers (numpy scalar, not Python float). + These tests pin the actual behaviour the docs now state. + """ + + def test_geoid_height_scalar_returns_python_float(self): + from xrspatial.reproject import geoid_height + out = geoid_height(-74.0, 40.7) + assert type(out) is float + + def test_geoid_height_array_returns_ndarray(self): + from xrspatial.reproject import geoid_height + out = geoid_height(np.array([-74.0, 0.0]), np.array([40.7, 0.0])) + assert type(out) is np.ndarray + assert out.shape == (2,) + + def test_geoid_height_dataarray_returns_ndarray(self): + from xrspatial.reproject import geoid_height + lon = xr.DataArray(np.array([-74.0, 0.0])) + lat = xr.DataArray(np.array([40.7, 0.0])) + out = geoid_height(lon, lat) + # Documented: DataArray input comes back as a plain ndarray. + assert type(out) is np.ndarray + + def test_conversion_wrappers_return_numpy_types(self): + from xrspatial.reproject import ( + depth_to_ellipsoidal, + ellipsoidal_to_depth, + ellipsoidal_to_orthometric, + orthometric_to_ellipsoidal, + ) + for func in (ellipsoidal_to_orthometric, orthometric_to_ellipsoidal, + depth_to_ellipsoidal, ellipsoidal_to_depth): + scalar_out = func(100.0, -74.0, 40.7) + assert isinstance(scalar_out, np.floating), func.__name__ + arr_out = func(np.array([100.0, 50.0]), + np.array([-74.0, 0.0]), np.array([40.7, 0.0])) + assert type(arr_out) is np.ndarray, func.__name__ + assert arr_out.shape == (2,), func.__name__ + + @pytest.mark.skipif(not HAS_CUPY, reason="cupy required") class TestMergeCupyBackends: """merge() accepts GPU-backed inputs and returns a GPU mosaic (#3095).