xarray-contrib · brendancol · May 29, 2026 · May 29, 2026 · May 29, 2026 · May 29, 2026
diff --git a/xrspatial/tests/test_zonal.py b/xrspatial/tests/test_zonal.py
@@ -626,9 +626,9 @@ def test_stats_all_nan_zone_preserved(backend):
         'min': [1.0, 3.0, 9.0, np.nan],
         'max': [5.0, 7.0, 10.0, np.nan],
         'mean': [(1.0 + 2.0 + 5.0) / 3, (3.0 + 4.0 + 7.0) / 3, 9.5, np.nan],
-        # numpy's _calc_stats leaves an all-NaN zone at NaN for every stat,
-        # including count (the func is only called when len(zone_values) > 0).
-        'count': [3, 3, 2, np.nan],
+        # An empty zone (zone 5, all NaN) reports count == 0 because the
+        # number of valid cells is zero. Other stats stay NaN (issue #2644).
+        'count': [3, 3, 2, 0],
     }
     check_results(backend, df_result, expected_result)
 
@@ -800,9 +800,10 @@ def test_zonal_stats_against_qgis(elevation_raster_no_nans, raster, qgis_zonal_s
 def test_stats_all_nan_zone(backend):
     """Zone where every value is NaN should not crash.
 
-    All backends now agree: the empty zone stays in the output with NaN
-    stats. The cupy path used to drop the zone (issue #2562) — that is
-    now fixed and the per-backend branching is gone.
+    All backends agree: the empty zone stays in the output. Its count is
+    0 (zero valid cells) while every other stat is NaN (issue #2644). The
+    cupy path used to drop the zone entirely (issue #2562); that is fixed
+    and the per-backend branching is gone.
     """
     if 'cupy' in backend and not has_cuda_and_cupy():
         pytest.skip("Requires CUDA and CuPy")
@@ -826,7 +827,7 @@ def test_stats_all_nan_zone(backend):
         'max':   [np.nan, 7.0],
         'min':   [np.nan, 5.0],
         'sum':   [np.nan, 12.0],
-        'count': [np.nan, 2],
+        'count': [0, 2],
     }
     check_results(backend, df_result, expected)
 
@@ -897,9 +898,11 @@ def test_stats_negative_zone_ids(backend):
 @pytest.mark.filterwarnings("ignore:Degrees of freedom:RuntimeWarning")
 @pytest.mark.parametrize("backend", ['numpy', 'dask+numpy', 'cupy', 'dask+cupy'])
 def test_stats_nodata_wipes_zone(backend):
-    """When nodata_values filters out every finite value in a zone, stats are NaN.
+    """When nodata_values filters out every finite value in a zone, the zone
+    is empty: count is 0 and the other stats are NaN.
 
-    All backends now agree after the issue #2562 fix.
+    All backends agree after the issue #2562 fix. The empty-zone count is 0
+    rather than NaN per issue #2644.
     """
     if 'cupy' in backend and not has_cuda_and_cupy():
         pytest.skip("Requires CUDA and CuPy")
@@ -923,7 +926,7 @@ def test_stats_nodata_wipes_zone(backend):
         'max':   [np.nan, 7.0],
         'min':   [np.nan, 3.0],
         'sum':   [np.nan, 10.0],
-        'count': [np.nan, 2],
+        'count': [0, 2],
     }
     check_results(backend, df_result, expected)
 

diff --git a/xrspatial/zonal.py b/xrspatial/zonal.py
@@ -223,11 +223,23 @@ def _nanreduce_preserve_allnan(blocks, func):
     return result
 
 
+def _count_reduce(blocks):
+    """Sum per-block counts. An empty zone (NaN in every block) totals 0.
+
+    Unlike the other reducers, count does not preserve all-NaN as NaN: an
+    empty zone has zero valid cells, so its count is 0, not undefined.
+    """
+    import warnings
+    with warnings.catch_warnings():
+        warnings.simplefilter('ignore', RuntimeWarning)
+        return np.nansum(blocks, axis=0)
+
+
 _DASK_STATS = dict(
     max=lambda blocks: _nanreduce_preserve_allnan(blocks, np.nanmax),
     min=lambda blocks: _nanreduce_preserve_allnan(blocks, np.nanmin),
     sum=lambda blocks: _nanreduce_preserve_allnan(blocks, np.nansum),
-    count=lambda blocks: _nanreduce_preserve_allnan(blocks, np.nansum),
+    count=_count_reduce,
     sum_squares=lambda blocks: _nanreduce_preserve_allnan(blocks, np.nansum),
 )
 
@@ -319,16 +331,28 @@ def _sort_and_stride(zones, values, unique_zones):
     return sorted_indices, values_by_zones, zone_breaks
 
 
+def _empty_zone_value(stat_name: str) -> float:
+    # 'count' is a cardinality: an empty zone has zero valid cells, so its
+    # count is 0. Every other built-in stat (and any custom callable) is
+    # undefined over no values, so it stays NaN.
+    return 0.0 if stat_name == 'count' else np.nan
+
+
 def _calc_stats(
     values_by_zones: np.array,
     zone_breaks: np.array,
     unique_zones: np.array,
     zone_ids: np.array,
     func: Callable,
     nodata_values: Union[int, float] = None,
+    empty_zone_value: float = np.nan,
 ):
+    # An "empty" zone exists in the zones raster but has no valid values
+    # (all NaN, or all equal to nodata_values). Most stats leave NaN there
+    # (empty_zone_value defaults to NaN), but count passes 0 because the
+    # count of no values is a cardinality of zero, not undefined.
     start = 0
-    results = np.full(unique_zones.shape, np.nan)
+    results = np.full(unique_zones.shape, empty_zone_value, dtype=np.float64)
     for i in range(len(unique_zones)):
         end = zone_breaks[i]
         if unique_zones[i] in zone_ids:
@@ -505,7 +529,8 @@ def _stats_numpy(
             func = stats_funcs.get(stats)
             stats_dict[stats] = _calc_stats(
                 values_by_zones, zone_breaks,
-                unique_zones, zone_ids, func, nodata_values
+                unique_zones, zone_ids, func, nodata_values,
+                empty_zone_value=_empty_zone_value(stats),
             )
             stats_dict[stats] = stats_dict[stats][selected_indexes]
         result = pd.DataFrame(stats_dict)
@@ -519,7 +544,8 @@ def _stats_numpy(
             func = stats_funcs.get(stats)
             stats_results = _calc_stats(
                 values_by_zones, zone_breaks,
-                unique_zones, zone_ids, func, nodata_values
+                unique_zones, zone_ids, func, nodata_values,
+                empty_zone_value=_empty_zone_value(stats),
             )
             for zone in zone_ids:
                 iz = zone_ids_map[zone]  # position of zone in unique_zones
@@ -604,7 +630,8 @@ def _stats_cupy(
 
         # extract zone_values, then filter per-zone for non-finite values
         # and the nodata sentinel. If the zone has no valid values left,
-        # emit NaN for every stat instead of dropping the zone.
+        # emit the empty-zone value for every stat instead of dropping the
+        # zone: 0 for count (a cardinality), NaN for everything else.
         zone_values = values_by_zone[unique_index[i]:unique_index[i]+unique_counts[i]]
         zone_mask = cupy.isfinite(zone_values)
         if nodata_values is not None:
@@ -613,7 +640,7 @@ def _stats_cupy(
 
         if zone_values.size == 0:
             for stats in stats_funcs:
-                stats_dict[stats].append(float('nan'))
+                stats_dict[stats].append(_empty_zone_value(stats))
             continue
 
         # apply stats on the zone data
@@ -741,6 +768,17 @@ def stats(
         Extra keyword arguments forwarded to ``rasterize()`` when
         *zones* is vector input (e.g. ``{'all_touched': True}``).
 
+    Notes
+    -----
+    Empty zones. A zone that exists in ``zones`` but has no valid values
+    (every cell is NaN, or every cell equals ``nodata_values``) still
+    appears as a row in the output. For such a zone, ``count`` is ``0``
+    because the number of valid cells is zero. Every other statistic
+    (``mean``, ``min``, ``max``, ``sum``, ``std``, ``var``, ``majority``,
+    and any custom callable) is ``NaN``, since those values are undefined
+    over an empty set. This holds across the numpy, cupy, and dask
+    backends.
+
     Returns
     -------
     stats_df : Union[pandas.DataFrame, dask.dataframe.DataFrame]