Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .claude/sweep-performance-state.csv
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,6 @@ surface_distance,2026-03-31T18:00:00Z,SAFE,memory-bound,0,1128,Memory guard adde
terrain,2026-03-31T18:00:00Z,RISKY,compute-bound,0,,
terrain_metrics,2026-03-31T18:00:00Z,SAFE,memory-bound,0,,
viewshed,2026-04-05T12:00:00Z,SAFE,memory-bound,0,fixed-in-tree,Tier B memory estimate tightened from 280 to 368 bytes/pixel (accounts for lexsort double-alloc + computed raster). astype copy=False avoids needless float64 copy.
visibility,2026-04-16T12:00:00Z,SAFE,memory-bound,0,fixed-in-tree,"Re-audit after Numba-ize (PR 1177) confirms SAFE. @ngjit kernels clean, type-stable. MEDIUM: K-observer graph growth in cumulative_viewshed (recommend periodic persist)."
visibility,2026-06-10,RISKY,compute-bound,0,3185,"cumulative_viewshed recomputed dask source per observer (fixed #3185: materialise once when no max_distance); graph grows ~64 tasks/observer with N; line_of_sight single-transect cheap; MEDIUM count temp .astype per observer (LOW, not fixed)"
worley,2026-03-31T18:00:00Z,SAFE,compute-bound,0,,
zonal,2026-05-27,SAFE,compute-bound,0,2526,"Pass 2 (2026-05-27): re-audit identified 3 MEDIUM findings. (1) zonal_apply 3D dask path: da.stack(layers, axis=2) left output chunks at size 1 along axis 2 -- filed #2526 and fixed by rechunking back to values_data.chunks[2] in _apply_dask_numpy (zonal.py:1691) and _apply_dask_cupy (zonal.py:1731). Confirmed via graph probe: 256x256 raster chunks=(64,64) 3 bands previously yielded chunks[2]=(1,1,1); now (3,). 1 new test (test_apply_dask_3d_axis2_rechunked_2526). 126 existing zonal tests pass. (2) _stats_cupy (zonal.py:588-608): per-zone x per-stat Python loop with cupy.float_(result) forces O(n_zones * n_stats) GPU<->CPU sync points; not fixed in this pass (CUDA-native rewrite needed, larger refactor). (3) _parallel_variance @delayed reduce iterates over all blocks in driver memory; for very large block counts the single-task merge becomes scheduler-bound but is not OOM since per-block arrays are O(n_zones). Not fixed (algorithmic refactor needed). Dask graph probe: stats(7 stats) on 2560x2560 chunks=256 -> 4449 tasks (44/chunk); stats(mean only) -> 823 tasks (8/chunk); crosstab -> 304 (3/chunk); hypsometric_integral -> 300 (3/chunk). All under 50K cap. SAFE/compute-bound verdict holds. | Fixed-in-tree 2026-04-16: rewrote hypsometric_integral dask path. Eliminated double-compute (_unique_finite_zones removed, each block discovers own zones). Replaced np.stack (O(n_blocks * n_zones) scheduler memory) with streaming dict-merge (O(n_zones)). 29 existing tests pass."
39 changes: 39 additions & 0 deletions xrspatial/tests/test_visibility.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,6 +236,45 @@ def test_dask_matches_numpy(self):
result_dask = cumulative_viewshed(raster_dask, observers)
np.testing.assert_array_equal(result_np.values, result_dask.values)

def test_dask_source_computed_once(self):
"""No-max_distance dask path computes the source once, not per observer."""
H = W = 16
base = np.zeros((H, W), dtype=float)
counter = {'n': 0}

def _src(block_info=None):
counter['n'] += 1
return base.copy()

source = da.map_blocks(_src, chunks=((H,), (W,)), dtype=float,
meta=np.array(()))
raster = xr.DataArray(
source, dims=['y', 'x'],
coords={'y': np.arange(H, dtype=float),
'x': np.arange(W, dtype=float)},
)
observers = [{'x': float(i), 'y': float(i), 'observer_elev': 5}
for i in range(4)]
counter['n'] = 0
result = cumulative_viewshed(raster, observers)
# source materialised exactly once despite four observers
assert counter['n'] == 1
# output stays dask-backed to match the dask input
assert isinstance(result.data, da.Array)

def test_dask_per_observer_max_distance_stays_lazy(self):
"""A per-observer max_distance keeps the dask windowing path."""
data = np.zeros((20, 20), dtype=float)
raster_np = _make_raster(data)
raster_dask = raster_np.copy()
raster_dask.data = da.from_array(data, chunks=(8, 8))
observers = [{'x': 10.0, 'y': 10.0, 'observer_elev': 10,
'max_distance': 3}]
result = cumulative_viewshed(raster_dask, observers)
assert isinstance(result.data, da.Array)
result_np = cumulative_viewshed(raster_np, observers)
np.testing.assert_array_equal(result.values, result_np.values)

def test_default_output_name(self):
data = np.zeros((5, 5), dtype=float)
raster = _make_raster(data)
Expand Down
26 changes: 26 additions & 0 deletions xrspatial/visibility.py
Original file line number Diff line number Diff line change
Expand Up @@ -264,9 +264,28 @@ def cumulative_viewshed(

# Detect dask backend to keep accumulation lazy
_is_dask = False
_input_dask_chunks = None
if has_dask_array():
import dask.array as da
_is_dask = isinstance(raster.data, da.Array)
if _is_dask:
_input_dask_chunks = raster.data.chunks

# When every observer takes the full-grid path (no max_distance anywhere),
# each viewshed() call would compute the same dask source independently,
# materialising it once per observer. Compute it a single time and run the
# observers against the in-memory raster instead. Observers that set a
# max_distance keep the dask backend so its windowing still loads only the
# relevant window per observer. The final result is re-wrapped as dask so
# the output backend still matches the dask input.
_materialised_dask = False
if _is_dask and max_distance is None and not any(
'max_distance' in obs for obs in observers):
materialised = raster.copy()
materialised.data = raster.data.compute()
raster = materialised
_is_dask = False
_materialised_dask = True

if _is_dask:
count = da.zeros(raster.shape, dtype=np.int32, chunks=raster.data.chunks)
Expand All @@ -288,6 +307,13 @@ def cumulative_viewshed(
vs_data = da.from_array(vs_data, chunks=raster.data.chunks)
count = count + (vs_data != INVISIBLE).astype(np.int32)

if _materialised_dask:
# Restore the dask-backed output type the dask input would have given.
# The count is already a concrete numpy array, so this wrap only
# changes the container type; no laziness is recovered and computing
# the result triggers no further source materialisation.
count = da.from_array(count, chunks=_input_dask_chunks)

result = xarray.DataArray(count, name=name, coords=raster.coords,
dims=raster.dims, attrs=raster.attrs)
if _is_dask:
Expand Down
Loading