From 1da185aa0b56e1735cec206a4bb07174b925cdff Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Fri, 5 Jun 2026 07:13:33 -0700 Subject: [PATCH 1/6] Align open_geotiff params with rioxarray open_rasterio (#2961) Rename mask_nodata->masked (default flips True->False) and name->default_name with deprecation aliases; add mask_and_scale, parse_coordinates, lock, cache. mask_and_scale/parse_coordinates implemented on eager+dask CPU paths; gated for gpu/vrt. lock/cache accept+warn. --- xrspatial/geotiff/__init__.py | 191 +++++++++++++++++++++++----- xrspatial/geotiff/_attrs.py | 63 ++++++++- xrspatial/geotiff/_backends/dask.py | 39 +++++- xrspatial/geotiff/_runtime.py | 8 ++ 4 files changed, 258 insertions(+), 43 deletions(-) diff --git a/xrspatial/geotiff/__init__.py b/xrspatial/geotiff/__init__.py index 2e956d5aa..caa418812 100644 --- a/xrspatial/geotiff/__init__.py +++ b/xrspatial/geotiff/__init__.py @@ -81,7 +81,8 @@ from ._reader import _MAX_CLOUD_BYTES_SENTINEL, CloudSizeLimitError, UnsafeURLError from ._reader import read_to_array as _read_to_array from ._runtime import (_CRS_WKT_DEPRECATED_SENTINEL, _GPU_DEPRECATED_SENTINEL, # noqa: F401 - _MISSING_SOURCES_SENTINEL, _ON_GPU_FAILURE_SENTINEL, GeoTIFFFallbackWarning, + _MASK_NODATA_DEPRECATED_SENTINEL, _MISSING_SOURCES_SENTINEL, + _NAME_DEPRECATED_SENTINEL, _ON_GPU_FAILURE_SENTINEL, GeoTIFFFallbackWarning, _geotiff_strict_mode, _gpu_fallback_warning_message) from ._validation import (_validate_3d_writer_dims, _validate_chunks_arg, # noqa: F401 _validate_tile_size_arg) @@ -491,7 +492,8 @@ def open_geotiff(source: str | BinaryIO, *, bbox: tuple | None = None, overview_level: int | None = None, band: int | None = None, - name: str | None = None, + default_name: str | None = None, + name: str | None = _NAME_DEPRECATED_SENTINEL, # type: ignore[assignment] chunks: int | tuple | None = None, gpu: bool = False, max_pixels: int | None = None, @@ -506,7 +508,12 @@ def open_geotiff(source: str | BinaryIO, *, allow_experimental_codecs: bool = False, allow_internal_only_jpeg: bool = False, band_nodata: str | None = None, - mask_nodata: bool = True, + masked: bool = False, + mask_nodata: bool = _MASK_NODATA_DEPRECATED_SENTINEL, # type: ignore[assignment] + mask_and_scale: bool = False, + parse_coordinates: bool = True, + lock=None, + cache: bool = True, ) -> xr.DataArray: """Read a GeoTIFF, COG, or VRT file into an xarray.DataArray. @@ -537,9 +544,15 @@ def open_geotiff(source: str | BinaryIO, *, map. Per-parameter tier markers below describe the tier the parameter itself carries; a parameter's effective tier is bounded by the function-level surface above (e.g. ``[stable]`` - ``mask_nodata`` is still only stable when combined with a + ``masked`` is still only stable when combined with a ``[stable]`` source, codec, and options). + The read/masking parameters are named to match rioxarray's + ``open_rasterio`` (``masked``, ``default_name``, ``mask_and_scale``, + ``parse_coordinates``, ``lock``, ``cache``) so callers can move between + the two with minimal edits. ``masked`` defaults to ``False`` (no + sentinel-to-NaN promotion), matching ``open_rasterio``. + Automatically dispatches to the best backend: - ``gpu=True``: GPU-accelerated read via nvCOMP (returns CuPy) - ``chunks=N``: Dask lazy read via windowed chunks @@ -596,8 +609,13 @@ def open_geotiff(source: str | BinaryIO, *, release-gate parity. band : int or None [stable] Band index (0-based). None returns all bands. + default_name : str or None + [stable] Name for the DataArray. None derives it from the source + file name. Matches rioxarray's ``open_rasterio`` parameter. name : str or None - [stable] Name for the DataArray. + [deprecated] Deprecated alias of ``default_name``; emits a + ``DeprecationWarning``. Passing both ``default_name`` and ``name`` + raises ``TypeError``. chunks : int, tuple, or None [stable] Chunk size for Dask lazy reading. Dask reads are gated against the eager reader by the cross-backend parity @@ -664,17 +682,49 @@ def open_geotiff(source: str | BinaryIO, *, using band 0's sentinel for the whole mosaic. Passing this kwarg with a non-VRT source raises ``ValueError`` because the policy only applies to the VRT pipeline. - mask_nodata : bool, default True - [stable] If True (the default), replace the nodata sentinel - with ``NaN``; integer rasters get promoted to ``float64`` first - so NaN can be represented. If False, skip the sentinel-to-NaN + masked : bool, default False + [stable] If True, replace the nodata sentinel with ``NaN``; + integer rasters get promoted to ``float64`` first so NaN can be + represented. If False (the default), skip the sentinel-to-NaN step and keep the source dtype. ``attrs['nodata']`` still - carries the raw sentinel either way, so downstream code can - mask explicitly. Pass ``mask_nodata=False`` when you want to - preserve an integer source dtype via ``dtype=``: the default - ``mask_nodata=True`` promotes to ``float64`` whenever the - sentinel matches an actual pixel, and ``dtype=`` then - raises ``ValueError`` on the float-to-int cast. + carries the raw sentinel either way, so downstream code can mask + explicitly. The default matches rioxarray's ``open_rasterio`` + (``masked=False``); note that earlier xrspatial releases masked + by default (``mask_nodata=True``), so a bare ``open_geotiff(path)`` + no longer promotes the sentinel to NaN. Pass ``masked=True`` and + ``dtype=`` together and the read raises ``ValueError`` + once the sentinel matches a pixel, because the float64 promotion + then makes the integer cast lossy. + mask_nodata : bool + [deprecated] Deprecated alias of ``masked``; emits a + ``DeprecationWarning``. Passing both ``masked`` and ``mask_nodata`` + raises ``TypeError``. Note the default also changed from + ``mask_nodata=True`` to ``masked=False``. + mask_and_scale : bool, default False + [advanced] If True, read the source's GDAL ``SCALE`` / ``OFFSET`` + metadata and return ``data * scale + offset``, masking the nodata + sentinel to NaN as well (matching rioxarray's ``open_rasterio``). + The applied values are recorded on ``attrs['scale_factor']`` / + ``attrs['add_offset']``. A source without scale / offset metadata + is a no-op. A single scale / offset pair is applied to the whole + array; a source with differing per-band values is read with band + 0's. Supported on the CPU eager and dask paths; combining it with + ``gpu=True`` or a ``.vrt`` source raises ``ValueError``. + parse_coordinates : bool, default True + [stable] If True (the default), build ``x`` / ``y`` coordinate + arrays from the transform. If False, skip them and return a + DataArray with only dimensions (matching rioxarray's + ``open_rasterio``); ``attrs['transform']`` and ``attrs['crs']`` + still carry the georeferencing, and the ``band`` coord is kept. + Supported on the CPU eager and dask paths; combining + ``parse_coordinates=False`` with ``gpu=True`` or a ``.vrt`` source + raises ``ValueError``. + lock, cache + [advanced] Accepted for ``open_rasterio`` signature compatibility + but have no effect: xrspatial's reader re-opens the source per + window, so there is no shared GDAL handle to lock and no caching + layer to toggle. Passing a non-default value emits a + ``GeoTIFFFallbackWarning``. allow_rotated : bool, default False [advanced] Read-only opt-in. ``to_geotiff`` does not currently emit ``rotated_affine``; it rejects DataArrays that carry the @@ -777,16 +827,17 @@ def open_geotiff(source: str | BinaryIO, *, is what makes write -> read -> write -> read round-trips bit-stable for rasters with fractional pixel sizes or origins. - Integer rasters with a nodata sentinel are silently promoted to - ``float64`` with NaN replacing the sentinel so downstream NaN-aware - code works uniformly. To keep the source dtype on a file whose - sentinel matches actual pixels, pass ``mask_nodata=False``; the raw - sentinel stays in the data and ``attrs['nodata']`` still carries it. - Passing ``dtype=`` on its own is not enough: the - sentinel-to-NaN promotion runs first and the subsequent integer cast - then raises ``ValueError`` (float-to-int is lossy in a way users - rarely intend). When the file has no in-range sentinel match, the - promotion is skipped and ``dtype=`` works either way. + With ``masked=True``, integer rasters with a nodata sentinel are + promoted to ``float64`` with NaN replacing the sentinel so downstream + NaN-aware code works uniformly. The default ``masked=False`` keeps the + source dtype and leaves the raw sentinel in the data; + ``attrs['nodata']`` still carries it either way. With ``masked=True``, + passing ``dtype=`` as well is not enough to keep an integer + dtype: the sentinel-to-NaN promotion runs first and the subsequent + integer cast then raises ``ValueError`` (float-to-int is lossy in a + way users rarely intend). When the file has no in-range sentinel + match, the promotion is skipped and ``dtype=`` works either + way. Examples -------- @@ -816,6 +867,72 @@ def open_geotiff(source: str | BinaryIO, *, source = _coerce_path(source) + # Resolve the rioxarray-compatible renames. ``masked`` / ``default_name`` + # are the canonical names; ``mask_nodata`` / ``name`` are deprecated + # aliases kept for back-compat. Mirrors the sentinel-based deprecation in + # ``read_geotiff_gpu`` (gpu -> on_gpu_failure): passing both the old and + # new name is ambiguous and raises, passing the old name alone warns. + if mask_nodata is not _MASK_NODATA_DEPRECATED_SENTINEL: + if masked is not False: + raise TypeError( + "open_geotiff: pass either 'masked' or the deprecated " + "'mask_nodata' alias, not both.") + warnings.warn( + "open_geotiff(..., mask_nodata=...) is deprecated; use " + "masked=... instead. Note the default also changed from " + "mask_nodata=True to masked=False to match rioxarray's " + "open_rasterio: a bare open_geotiff(path) no longer promotes " + "the nodata sentinel to NaN.", + DeprecationWarning, stacklevel=2) + masked = mask_nodata + if name is not _NAME_DEPRECATED_SENTINEL: + if default_name is not None: + raise TypeError( + "open_geotiff: pass either 'default_name' or the deprecated " + "'name' alias, not both.") + warnings.warn( + "open_geotiff(..., name=...) is deprecated; use default_name=... " + "instead to match rioxarray's open_rasterio.", + DeprecationWarning, stacklevel=2) + default_name = name + + # ``lock`` / ``cache`` are accepted for open_rasterio signature + # compatibility. xrspatial's dask reader re-opens the source per window, + # so there is no shared GDAL handle to lock and no caching backend to + # toggle. Warn rather than silently ignore so a porting caller is not + # surprised by a no-op. + if lock is not None or cache is not True: + warnings.warn( + "open_geotiff: 'lock' and 'cache' are accepted for rioxarray " + "open_rasterio compatibility but have no effect; xrspatial's " + "reader re-opens the source per window, so there is no shared " + "GDAL handle to lock and no caching layer to toggle.", + GeoTIFFFallbackWarning, stacklevel=2) + + # ``mask_and_scale`` and ``parse_coordinates=False`` are implemented on + # the CPU eager and dask paths only. The GPU and VRT-mosaic paths build + # their DataArrays through separate code that is not covered by the + # cross-backend parity suite for these options, so refuse the + # combination up front rather than silently ignoring the kwarg -- the + # same per-backend rejection contract the dispatcher already applies to + # on_gpu_failure / missing_sources / max_cloud_bytes. + _is_vrt_source_early = ( + isinstance(source, str) and source.lower().endswith('.vrt')) + if mask_and_scale or not parse_coordinates: + offending = ( + 'mask_and_scale=True' if mask_and_scale + else 'parse_coordinates=False') + if gpu: + raise ValueError( + f"{offending} is not supported with gpu=True; it is " + "implemented on the CPU eager and dask paths. Drop gpu=True " + "or the kwarg.") + if _is_vrt_source_early: + raise ValueError( + f"{offending} is not supported for .vrt sources; it is " + "implemented on the CPU eager and dask paths over .tif " + "sources. Drop the kwarg.") + # All dispatcher-level kwarg rejection lives in # ``_validate_dispatch_kwargs`` so the three direct backends # (``read_geotiff_dask``, ``read_geotiff_gpu``, ``read_vrt``) @@ -932,7 +1049,7 @@ def open_geotiff(source: str | BinaryIO, *, if missing_sources_passed: vrt_kwargs['missing_sources'] = missing_sources return read_vrt(source, dtype=dtype, window=window, band=band, - name=name, chunks=chunks, gpu=gpu, + name=default_name, chunks=chunks, gpu=gpu, max_pixels=max_pixels, allow_rotated=allow_rotated, allow_unparseable_crs=allow_unparseable_crs, @@ -941,7 +1058,7 @@ def open_geotiff(source: str | BinaryIO, *, allow_experimental_codecs=allow_experimental_codecs, allow_internal_only_jpeg=allow_internal_only_jpeg, band_nodata=band_nodata, - mask_nodata=mask_nodata, + mask_nodata=masked, **vrt_kwargs) # File-like buffer rejections for ``gpu=True`` / ``chunks=...`` already @@ -957,7 +1074,7 @@ def open_geotiff(source: str | BinaryIO, *, return read_geotiff_gpu(source, dtype=dtype, overview_level=overview_level, window=window, band=band, - name=name, chunks=chunks, + name=default_name, chunks=chunks, max_pixels=max_pixels, allow_rotated=allow_rotated, allow_unparseable_crs=allow_unparseable_crs, @@ -967,7 +1084,7 @@ def open_geotiff(source: str | BinaryIO, *, allow_experimental_codecs), allow_internal_only_jpeg=( allow_internal_only_jpeg), - mask_nodata=mask_nodata, + mask_nodata=masked, **gpu_kwargs) # Dask path (CPU) @@ -975,7 +1092,7 @@ def open_geotiff(source: str | BinaryIO, *, return read_geotiff_dask(source, dtype=dtype, chunks=chunks, overview_level=overview_level, window=window, band=band, - max_pixels=max_pixels, name=name, + max_pixels=max_pixels, name=default_name, allow_rotated=allow_rotated, allow_unparseable_crs=allow_unparseable_crs, allow_invalid_nodata=allow_invalid_nodata, @@ -984,7 +1101,9 @@ def open_geotiff(source: str | BinaryIO, *, allow_experimental_codecs), allow_internal_only_jpeg=( allow_internal_only_jpeg), - mask_nodata=mask_nodata) + mask_nodata=masked, + mask_and_scale=mask_and_scale, + parse_coordinates=parse_coordinates) kwargs = {} if max_pixels is not None: @@ -1007,11 +1126,11 @@ def open_geotiff(source: str | BinaryIO, *, **kwargs, ) - if name is None: + if default_name is None: # Derive from source path. File-like buffers don't have a path, # so leave name unset rather than fabricating one. if isinstance(source, str): - name = os.path.splitext(os.path.basename(source))[0] + default_name = os.path.splitext(os.path.basename(source))[0] # Hand the post-decode buffer to the shared eager finalizer. The # helper runs the same validate -> populate attrs -> mask -> cast @@ -1030,12 +1149,14 @@ def open_geotiff(source: str | BinaryIO, *, geo_info=geo_info, nodata=nodata, mask_sentinel=mask_sentinel, - mask_nodata=mask_nodata, + mask_nodata=masked, dtype=dtype, window=window, - name=name, + name=default_name, allow_rotated=allow_rotated, allow_unparseable_crs=allow_unparseable_crs, + mask_and_scale=mask_and_scale, + parse_coordinates=parse_coordinates, ) diff --git a/xrspatial/geotiff/_attrs.py b/xrspatial/geotiff/_attrs.py index de1185de0..d1120248e 100644 --- a/xrspatial/geotiff/_attrs.py +++ b/xrspatial/geotiff/_attrs.py @@ -1527,6 +1527,35 @@ def _apply_eager_nodata_mask(arr, *, mask_sentinel, mask_nodata): return arr, nodata_pixels_present +def _extract_scale_offset(gdal_metadata): + """Pull SCALE / OFFSET from parsed GDAL_METADATA for ``mask_and_scale``. + + Returns ``(scale, offset)`` floats, defaulting to ``(1.0, 0.0)`` when the + source carries no scale / offset. GDAL stores these in the GDAL_METADATA + XML either as dataset-level ``SCALE`` / ``OFFSET`` items or per-band items + keyed ``(name, band_index)`` by :func:`_parse_gdal_metadata`. Dataset-level + values are preferred; band 0's per-band values are the fallback. A single + pair is applied to the whole array, so a source with differing per-band + scale / offset is read with band 0's values (documented limitation). + """ + scale, offset = 1.0, 0.0 + if not gdal_metadata: + return scale, offset + + def _num(keys, default): + for k in keys: + if k in gdal_metadata: + try: + return float(gdal_metadata[k]) + except (TypeError, ValueError): + return default + return default + + scale = _num(['SCALE', ('SCALE', 0)], 1.0) + offset = _num(['OFFSET', ('OFFSET', 0)], 0.0) + return scale, offset + + def _finalize_eager_read( arr, *, @@ -1540,6 +1569,8 @@ def _finalize_eager_read( allow_rotated: bool = False, allow_unparseable_crs: bool = False, attrs_in: dict | None = None, + mask_and_scale: bool = False, + parse_coordinates: bool = True, ): """Validate, populate attrs, mask, cast, and build an eager DataArray. @@ -1593,15 +1624,33 @@ def _finalize_eager_read( attrs: dict = dict(attrs_in) if attrs_in else {} _populate_attrs_from_geo_info(attrs, geo_info, window=window) + # ``mask_and_scale`` implies masking (rioxarray applies scale / offset + # AND masks the nodata sentinel to NaN), so fold it into the mask gate. + effective_mask = mask_nodata or mask_and_scale + # Apply the nodata-to-NaN mask (or compute pixels_present - # without rewriting if ``mask_nodata=False``). Skipped entirely when + # without rewriting if masking is off). Skipped entirely when # the source declared no sentinel. nodata_pixels_present: bool | None = None if nodata is not None: arr, nodata_pixels_present = _apply_eager_nodata_mask( - arr, mask_sentinel=mask_sentinel, mask_nodata=mask_nodata, + arr, mask_sentinel=mask_sentinel, mask_nodata=effective_mask, ) + # ``mask_and_scale``: apply ``data * scale + offset`` from the source's + # GDAL_METADATA. Runs before the caller's ``dtype=`` cast so a + # ``dtype=`` request raises the same float-to-int ValueError the + # mask path raises (scaling promotes to float). + if mask_and_scale: + scale, offset = _extract_scale_offset( + getattr(geo_info, 'gdal_metadata', None)) + if scale != 1.0 or offset != 0.0: + if arr.dtype.kind != 'f': + arr = arr.astype(np.float64) + arr = arr * scale + offset + attrs['scale_factor'] = scale + attrs['add_offset'] = offset + # Caller-requested dtype cast (post-mask so the integer # promotion above runs first). ``_validate_dtype_cast`` lives in # ``_validation``; local import keeps ``_attrs`` free of a top-level @@ -1622,16 +1671,20 @@ def _finalize_eager_read( # masked" rather than "masking was disabled"). _set_nodata_attrs( attrs, nodata, - masked=(mask_nodata and np.dtype(str(arr.dtype)).kind == 'f'), + masked=(effective_mask and np.dtype(str(arr.dtype)).kind == 'f'), pixels_present=nodata_pixels_present, dtype_cast=dtype_cast_attr, ) # Build the DataArray. ``_coords_from_geo_info`` honours the # windowed-read contract (origin shifted to the window's top-left). + # ``parse_coordinates=False`` skips the x / y coordinate arrays + # (matching rioxarray); the transform / crs attrs still carry the + # georeferencing, and the band coord is kept. height, width = arr.shape[:2] - coords = _coords_from_geo_info( - geo_info, height, width, window=window, + coords = ( + _coords_from_geo_info(geo_info, height, width, window=window) + if parse_coordinates else {} ) if arr.ndim == 3: dims = ['y', 'x', 'band'] diff --git a/xrspatial/geotiff/_backends/dask.py b/xrspatial/geotiff/_backends/dask.py index 3de4d47a4..5bda7f4fd 100644 --- a/xrspatial/geotiff/_backends/dask.py +++ b/xrspatial/geotiff/_backends/dask.py @@ -40,7 +40,9 @@ def read_geotiff_dask(source: str, *, allow_experimental_codecs: bool = False, allow_internal_only_jpeg: bool = False, band_nodata: str | None = None, - mask_nodata: bool = True) -> xr.DataArray: + mask_nodata: bool = True, + mask_and_scale: bool = False, + parse_coordinates: bool = True) -> xr.DataArray: """Read a GeoTIFF as a dask-backed DataArray for out-of-core processing. Release-contract tier (see @@ -105,6 +107,15 @@ def read_geotiff_dask(source: str, *, either way. Pass ``mask_nodata=False`` together with ``dtype=`` to keep an integer source dtype; the default promotes to ``float64`` and the cast then raises. + mask_and_scale : bool, default False + [advanced] If True, apply the source's GDAL ``SCALE`` / ``OFFSET`` + (``data * scale + offset``) lazily on the assembled dask array and + mask the nodata sentinel. Records ``attrs['scale_factor']`` / + ``attrs['add_offset']``. No-op when the source carries no scale / + offset metadata. + parse_coordinates : bool, default True + [stable] If False, skip the ``x`` / ``y`` coordinate arrays; the + ``transform`` / ``crs`` attrs still carry the georeferencing. allow_rotated : bool, default False [advanced] Read-side opt-in for rotated / sheared ``ModelTransformationTag`` files. Forwarded to every per-chunk @@ -418,6 +429,10 @@ def read_geotiff_dask(source: str, *, and file_dtype.kind in ('u', 'i') and lifecycle.sentinel_fits_buffer): effective_dtype = np.dtype('float64') + # ``mask_and_scale`` applies ``data * scale + offset`` (and masks), which + # promotes any integer source to float regardless of the sentinel. + if mask_and_scale and file_dtype.kind != 'f': + effective_dtype = np.dtype('float64') if dtype is not None: target_dtype = np.dtype(dtype) @@ -505,7 +520,7 @@ def read_geotiff_dask(source: str, *, attrs = _finalize_lazy_read_attrs( geo_info=geo_info, nodata=nodata_attr, - mask_nodata=mask_nodata, + mask_nodata=(mask_nodata or mask_and_scale), graph_dtype=target_dtype, caller_dtype=dtype, window=window, @@ -580,7 +595,7 @@ def read_geotiff_dask(source: str, *, # int-promotion branches in ``_delayed_read_window``. The # original sentinel is still carried in ``attrs['nodata']`` via # ``nodata_attr`` so write round-trips preserve the tag. - chunk_nodata = nodata if mask_nodata else None + chunk_nodata = nodata if (mask_nodata or mask_and_scale) else None # ``effective_source`` swaps in the sidecar URL when the # requested overview lives in an external ``.tif.ovr``. # For local files and non-sidecar remote @@ -613,6 +628,24 @@ def read_geotiff_dask(source: str, *, dask_arr = da.concatenate(dask_rows, axis=0) + # ``mask_and_scale``: apply ``data * scale + offset`` lazily on the + # assembled dask array. The per-chunk mask above already promoted the + # graph to float and replaced sentinels with NaN. + if mask_and_scale: + from .._attrs import _extract_scale_offset + scale, offset = _extract_scale_offset( + getattr(geo_info, 'gdal_metadata', None)) + if scale != 1.0 or offset != 0.0: + dask_arr = dask_arr * scale + offset + attrs['scale_factor'] = scale + attrs['add_offset'] = offset + + # ``parse_coordinates=False`` drops the x / y coordinate arrays (the + # transform / crs attrs still carry georeferencing); the band coord is + # kept. + if not parse_coordinates: + coords = {} + if out_has_band_axis: dims = ['y', 'x', 'band'] coords['band'] = np.arange(n_bands) diff --git a/xrspatial/geotiff/_runtime.py b/xrspatial/geotiff/_runtime.py index e9dfd58e1..f783ec05d 100644 --- a/xrspatial/geotiff/_runtime.py +++ b/xrspatial/geotiff/_runtime.py @@ -48,6 +48,14 @@ # ``path is None`` branch and raised a "missing required argument" # TypeError for the wrong reason. _VRT_PATH_MISSING_SENTINEL = object() +# ``open_geotiff`` renamed ``mask_nodata`` -> ``masked`` (and flipped the +# default from True to False) and ``name`` -> ``default_name`` to match +# rioxarray's ``open_rasterio``. Each sentinel distinguishes "caller passed +# the deprecated alias" from "caller passed nothing", so passing both the old +# and new name raises TypeError and the old name alone warns. Same rationale +# as ``_GPU_DEPRECATED_SENTINEL`` above. +_MASK_NODATA_DEPRECATED_SENTINEL = object() +_NAME_DEPRECATED_SENTINEL = object() # Spatial dim names recognised on 3D writer inputs. ``y``/``x`` are the From 5e5354f301be9dddbd26cc4496baf83d5a48d98c Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Fri, 5 Jun 2026 07:26:30 -0700 Subject: [PATCH 2/6] Migrate read_nodata + signature tests for masked default flip (#2961) --- xrspatial/geotiff/tests/read/test_nodata.py | 86 +++++++++---------- .../geotiff/tests/unit/test_signatures.py | 19 +++- 2 files changed, 59 insertions(+), 46 deletions(-) diff --git a/xrspatial/geotiff/tests/read/test_nodata.py b/xrspatial/geotiff/tests/read/test_nodata.py index c6592d125..49cee0955 100644 --- a/xrspatial/geotiff/tests/read/test_nodata.py +++ b/xrspatial/geotiff/tests/read/test_nodata.py @@ -399,7 +399,7 @@ def test_eager_mask_nodata_false_reports_false(tmp_path): path = str(tmp_path / "tmp_2092_eager_unmasked.tif") _make_float_raster_with_nodata_2092(path) - out = open_geotiff(path, mask_nodata=False) + out = open_geotiff(path, masked=False) assert out.attrs.get('nodata') == -9999.0 assert out.attrs.get('masked_nodata') is False, ( f"buffer holds literal -9999 pixels but attrs say " @@ -415,7 +415,7 @@ def test_eager_mask_nodata_true_reports_true(tmp_path): path = str(tmp_path / "tmp_2092_eager_masked.tif") _make_float_raster_with_nodata_2092(path) - out = open_geotiff(path) # mask_nodata defaults to True + out = open_geotiff(path, masked=True) assert out.attrs.get('nodata') == -9999.0 assert out.attrs.get('masked_nodata') is True # -9999 replaced with NaN. @@ -440,7 +440,7 @@ def test_eager_int_file_mask_nodata_true_no_match_reports_false(tmp_path): path = str(tmp_path / "tmp_2092_int_oor_sentinel.tif") to_geotiff(da, path) - out = open_geotiff(path) + out = open_geotiff(path, masked=True) assert out.attrs.get('nodata') == -9999 # No pixel matched, no cast, buffer stays uint16. assert out.dtype.kind == 'u' @@ -463,7 +463,7 @@ def test_eager_explicit_float_dtype_mask_off_reports_false(tmp_path): path = str(tmp_path / "tmp_2092_eager_int_to_float_unmasked.tif") to_geotiff(da, path) - out = open_geotiff(path, mask_nodata=False, dtype=np.float64) + out = open_geotiff(path, masked=False, dtype=np.float64) assert out.dtype == np.float64 assert out.attrs.get('masked_nodata') is False # The literal 30 is still in the float buffer (cast, not masked). @@ -559,7 +559,7 @@ def test_vrt_int_source_mask_nodata_false_reports_false(tmp_path): "tmp_2092_vrt_src.tif", "tmp_2092_vrt_unmasked.vrt", ) - out = open_geotiff(vrt, mask_nodata=False) + out = open_geotiff(vrt, masked=False) assert out.dtype.kind == 'i', f"expected int dtype, got {out.dtype}" assert out.attrs.get('masked_nodata') is False # The literal sentinel is still in the buffer. @@ -576,7 +576,7 @@ def test_vrt_int_source_mask_nodata_true_reports_true(tmp_path): "tmp_2092_vrt_src2.tif", "tmp_2092_vrt_masked.vrt", ) - out = open_geotiff(vrt) # mask_nodata defaults to True + out = open_geotiff(vrt, masked=True) assert out.dtype == np.float64, ( f"expected float64 promotion, got {out.dtype}") assert out.attrs.get('masked_nodata') is True @@ -597,7 +597,7 @@ def test_vrt_int_source_mask_off_with_float_cast_reports_false(tmp_path): "tmp_2092_vrt_src_cast.tif", "tmp_2092_vrt_unmasked_cast.vrt", ) - out = open_geotiff(vrt, mask_nodata=False, dtype=np.float64) + out = open_geotiff(vrt, masked=False, dtype=np.float64) assert out.dtype == np.float64 assert out.attrs.get('masked_nodata') is False # The literal 30 is still in the float buffer (cast, not masked). @@ -672,7 +672,7 @@ def test_eager_float_sentinel_present_masked(tmp_path): nodata_pixels_present=True, nodata_dtype_cast absent.""" path = str(tmp_path / "tmp_2135_eager_float_present.tif") _make_float_raster_2135(path) - out = open_geotiff(path) + out = open_geotiff(path, masked=True) assert out.attrs.get('masked_nodata') is True assert out.attrs.get('nodata_pixels_present') is True assert 'nodata_dtype_cast' not in out.attrs @@ -683,7 +683,7 @@ def test_eager_float_sentinel_absent_masked(tmp_path): nodata_pixels_present=False.""" path = str(tmp_path / "tmp_2135_eager_float_absent.tif") _make_float_raster_2135(path, plant_sentinel=False) - out = open_geotiff(path) + out = open_geotiff(path, masked=True) assert out.attrs.get('masked_nodata') is True assert out.attrs.get('nodata_pixels_present') is False @@ -693,7 +693,7 @@ def test_eager_float_sentinel_present_unmasked(tmp_path): masking branch skipped but presence scan still runs.""" path = str(tmp_path / "tmp_2135_eager_float_present_unmasked.tif") _make_float_raster_2135(path) - out = open_geotiff(path, mask_nodata=False) + out = open_geotiff(path, masked=False) assert out.attrs.get('masked_nodata') is False assert out.attrs.get('nodata_pixels_present') is True @@ -703,7 +703,7 @@ def test_eager_int_sentinel_present(tmp_path): promotion fires, nodata_pixels_present=True.""" path = str(tmp_path / "tmp_2135_eager_int_present.tif") _make_int_raster_2135(path) - out = open_geotiff(path) + out = open_geotiff(path, masked=True) assert out.dtype == np.float64 assert out.attrs.get('masked_nodata') is True assert out.attrs.get('nodata_pixels_present') is True @@ -720,7 +720,7 @@ def test_eager_int_out_of_range_sentinel(tmp_path): ) path = str(tmp_path / "tmp_2135_eager_int_oor.tif") to_geotiff(da, path) - out = open_geotiff(path) + out = open_geotiff(path, masked=True) assert out.attrs.get('nodata') == -9999 assert out.attrs.get('masked_nodata') is False assert out.attrs.get('nodata_pixels_present') is False @@ -732,7 +732,7 @@ def test_eager_int_sentinel_present_unmasked(tmp_path): nodata_pixels_present=True from the no-mask scan branch.""" path = str(tmp_path / "tmp_2135_eager_int_present_unmasked.tif") _make_int_raster_2135(path) - out = open_geotiff(path, mask_nodata=False) + out = open_geotiff(path, masked=False) assert out.dtype.kind == 'i' assert out.attrs.get('masked_nodata') is False assert out.attrs.get('nodata_pixels_present') is True @@ -743,7 +743,7 @@ def test_eager_dtype_cast_records_target(tmp_path): """``dtype=`` kwarg surfaces as nodata_dtype_cast.""" path = str(tmp_path / "tmp_2135_eager_dtype_cast.tif") _make_int_raster_2135(path) - out = open_geotiff(path, mask_nodata=False, dtype=np.float64) + out = open_geotiff(path, masked=False, dtype=np.float64) assert out.dtype == np.float64 assert out.attrs.get('masked_nodata') is False assert out.attrs.get('nodata_dtype_cast') == 'float64' @@ -841,7 +841,7 @@ def test_vrt_int_sentinel_present_masked(tmp_path): vrt = _write_int_vrt_2135( tmp_path, "tmp_2135_vrt_src.tif", "tmp_2135_vrt_present.vrt", ) - out = open_geotiff(vrt) + out = open_geotiff(vrt, masked=True) assert out.dtype == np.float64 assert out.attrs.get('masked_nodata') is True assert out.attrs.get('nodata_pixels_present') is True @@ -855,7 +855,7 @@ def test_vrt_int_sentinel_absent_masked(tmp_path): "tmp_2135_vrt_absent.vrt", plant_sentinel=False, ) - out = open_geotiff(vrt) + out = open_geotiff(vrt, masked=True) assert out.dtype.kind == 'i' # no promotion assert out.attrs.get('masked_nodata') is False assert out.attrs.get('nodata_pixels_present') is False @@ -867,7 +867,7 @@ def test_vrt_int_unmasked_still_scans(tmp_path): tmp_path, "tmp_2135_vrt_src_unmasked.tif", "tmp_2135_vrt_unmasked.vrt", ) - out = open_geotiff(vrt, mask_nodata=False) + out = open_geotiff(vrt, masked=False) assert out.dtype.kind == 'i' assert out.attrs.get('masked_nodata') is False assert out.attrs.get('nodata_pixels_present') is True @@ -879,7 +879,7 @@ def test_vrt_dtype_cast_records_target(tmp_path): tmp_path, "tmp_2135_vrt_src_cast.tif", "tmp_2135_vrt_cast.vrt", ) - out = open_geotiff(vrt, mask_nodata=False, dtype=np.float64) + out = open_geotiff(vrt, masked=False, dtype=np.float64) assert out.dtype == np.float64 assert out.attrs.get('masked_nodata') is False assert out.attrs.get('nodata_dtype_cast') == 'float64' @@ -1037,7 +1037,7 @@ def test_open_geotiff_eager_int_nodata_finite_still_masks(tmp_path): """Regression guard: in-range finite sentinel still masks correctly.""" # 30 is one of the pixel values; using it as a sentinel masks one pixel. path = _build_uint16_tiff_1774('30', tmp_path) - da = open_geotiff(path) + da = open_geotiff(path, masked=True) # uint16 + in-range sentinel hit promotes to float64 with NaN assert da.dtype == np.float64 assert np.isnan(da.values[1, 0]) @@ -1204,7 +1204,7 @@ def test_float_sentinel_strip_tiff_read(tmp_path): to_geotiff(da, path, nodata=-9999.0, tiled=False, compression='deflate') - out = open_geotiff(path) + out = open_geotiff(path, masked=True) expected_mask = (src == np.float32(-9999.0)) np.testing.assert_array_equal(np.isnan(out.data), expected_mask) finite = ~expected_mask @@ -1221,7 +1221,7 @@ def test_float_sentinel_tiled_tiff_read(tmp_path): to_geotiff(da, path, nodata=-9999.0, tiled=True, tile_size=16, compression='deflate') - out = open_geotiff(path) + out = open_geotiff(path, masked=True) expected_mask = (src == np.float32(-9999.0)) np.testing.assert_array_equal(np.isnan(out.data), expected_mask) finite = ~expected_mask @@ -1237,7 +1237,7 @@ def test_uint16_sentinel_tiled_tiff_read(tmp_path): to_geotiff(da, path, nodata=65535, tiled=True, tile_size=16, compression='deflate') - out = open_geotiff(path) + out = open_geotiff(path, masked=True) assert out.dtype.kind == 'f' expected_mask = (src == 65535) np.testing.assert_array_equal(np.isnan(out.data), expected_mask) @@ -1261,7 +1261,7 @@ def test_repeat_reads_independent(tmp_path): attrs={'crs': 4326, 'nodata': -9999.0}) to_geotiff(da, path, nodata=-9999.0, compression='deflate') - first = open_geotiff(path) + first = open_geotiff(path, masked=True) expected_mask = (src == np.float32(-9999.0)) np.testing.assert_array_equal(np.isnan(first.data), expected_mask) @@ -1270,7 +1270,7 @@ def test_repeat_reads_independent(tmp_path): first.data[1, 1] = np.nan first.data[2, 2] = 12345.0 - second = open_geotiff(path) + second = open_geotiff(path, masked=True) np.testing.assert_array_equal(np.isnan(second.data), expected_mask) finite = ~expected_mask np.testing.assert_allclose(second.data[finite], src[finite]) @@ -1290,7 +1290,7 @@ def test_dask_chunked_float_sentinel_read(tmp_path): to_geotiff(da, path, nodata=-9999.0, tiled=True, tile_size=16, compression='deflate') - out = open_geotiff(path, chunks=16) + out = open_geotiff(path, chunks=16, masked=True) materialised = out.compute().data expected_mask = (src == np.float32(-9999.0)) np.testing.assert_array_equal(np.isnan(materialised), expected_mask) @@ -1307,7 +1307,7 @@ def test_dask_chunked_uint16_sentinel_read(tmp_path): to_geotiff(da, path, nodata=65535, tiled=True, tile_size=16, compression='deflate') - out = open_geotiff(path, chunks=16) + out = open_geotiff(path, chunks=16, masked=True) materialised = out.compute().data assert materialised.dtype.kind == 'f' expected_mask = (src == 65535) @@ -1903,7 +1903,7 @@ def oor_tif(tmp_path): class TestIntegerSentinelParity: def test_eager_masks_int_sentinel_to_nan(self, int_tif): - da = open_geotiff(int_tif) + da = open_geotiff(int_tif, masked=True) # int sentinel auto-promotes to float when at least one pixel # matches, leaving NaN where the sentinel was. assert da.dtype.kind == "f" @@ -1912,7 +1912,7 @@ def test_eager_masks_int_sentinel_to_nan(self, int_tif): assert da.attrs["masked_nodata"] is True def test_dask_matches_eager(self, int_tif): - eager = open_geotiff(int_tif) + eager = open_geotiff(int_tif, masked=True) lazy = read_geotiff_dask(int_tif, chunks=2) # Same on-disk sentinel propagated. assert lazy.attrs["nodata"] == eager.attrs["nodata"] @@ -1926,7 +1926,7 @@ def test_dask_matches_eager(self, int_tif): def test_gpu_matches_eager(self, int_tif): from xrspatial.geotiff import read_geotiff_gpu - eager = open_geotiff(int_tif) + eager = open_geotiff(int_tif, masked=True) gpu = read_geotiff_gpu(int_tif) assert gpu.attrs["nodata"] == eager.attrs["nodata"] np.testing.assert_array_equal( @@ -1936,14 +1936,14 @@ def test_gpu_matches_eager(self, int_tif): class TestFloatSentinelParity: def test_eager(self, float_tif): - da = open_geotiff(float_tif) + da = open_geotiff(float_tif, masked=True) assert da.dtype == np.float32 assert np.isnan(da.data[0, 2]) assert da.attrs["nodata"] == -9999.0 assert da.attrs["masked_nodata"] is True def test_dask(self, float_tif): - eager = open_geotiff(float_tif) + eager = open_geotiff(float_tif, masked=True) lazy = read_geotiff_dask(float_tif, chunks=2) np.testing.assert_array_equal( np.isnan(eager.data), np.isnan(lazy.compute().data), @@ -1954,7 +1954,7 @@ def test_dask(self, float_tif): def test_gpu(self, float_tif): from xrspatial.geotiff import read_geotiff_gpu - eager = open_geotiff(float_tif) + eager = open_geotiff(float_tif, masked=True) gpu = read_geotiff_gpu(float_tif) np.testing.assert_array_equal( np.isnan(eager.data), np.isnan(gpu.data.get()), @@ -2026,7 +2026,7 @@ def _build(self, path, sentinel=10): def test_eager_masks_inverted_sentinel(self, tmp_path): path = str(tmp_path / "miw_2226.tif") self._build(path) - da = open_geotiff(path) + da = open_geotiff(path, masked=True) # The MinIsWhite writer pre-inverts both pixels AND the sentinel # (see ``_writer._invert_nodata_for_miniswhite``), so the on-disk # GDAL_NODATA tag stores 245 = 255 - 10. The reader's MinIsWhite @@ -2042,7 +2042,7 @@ def test_eager_masks_inverted_sentinel(self, tmp_path): def test_dask_matches_eager(self, tmp_path): path = str(tmp_path / "miw_dask_2226.tif") self._build(path) - eager = open_geotiff(path) + eager = open_geotiff(path, masked=True) lazy = read_geotiff_dask(path, chunks=2) np.testing.assert_array_equal( np.isnan(eager.data), np.isnan(lazy.compute().data), @@ -2083,7 +2083,7 @@ class _IFDStub: class TestMaskNodataFalseParity: def test_eager_keeps_literal_sentinel(self, int_tif): - da = open_geotiff(int_tif, mask_nodata=False) + da = open_geotiff(int_tif, masked=False) # Buffer keeps integer dtype + literal sentinel pixel (255). assert da.dtype == np.uint8 assert int(da.data[0, 2]) == 255 @@ -2193,7 +2193,7 @@ def test_eager_restores_nan_to_sentinel(self, tmp_path): to_geotiff(da, path, nodata=-9999.0) # Read back with mask_nodata=False so we can see the literal # on-disk byte value the restore step planted. - readback = open_geotiff(path, mask_nodata=False) + readback = open_geotiff(path, masked=False) assert readback.data[0, 1] == -9999.0 def test_masked_nodata_false_attr_blocks_restore(self, tmp_path): @@ -2215,7 +2215,7 @@ def test_masked_nodata_false_attr_blocks_restore(self, tmp_path): attrs={"nodata": -9999.0, "masked_nodata": False}, ) to_geotiff(da, path) - readback = open_geotiff(path, mask_nodata=False) + readback = open_geotiff(path, masked=False) # Restore step skipped, so the NaN survives as on-disk NaN. assert np.isnan(readback.data[0, 1]) @@ -2236,7 +2236,7 @@ def test_gpu_writer_matches_eager(self, tmp_path): dims=("y", "x"), ) to_geotiff(da, path, nodata=-9999.0, gpu=True) - readback = open_geotiff(path, mask_nodata=False) + readback = open_geotiff(path, masked=False) assert readback.data[0, 1] == -9999.0 @@ -2422,7 +2422,7 @@ def test_float_source_with_sentinel(self, tmp_path): """Float source + declared sentinel -> nodata set, masked_nodata=True.""" path = str(tmp_path / "tnss1988_float_sentinel.tif") _write_float_tiff_1988(path, with_sentinel=True) - da = open_geotiff(path) + da = open_geotiff(path, masked=True) assert da.attrs["nodata"] == _SENTINEL_1988 assert da.attrs["masked_nodata"] is True # The literal sentinel must have been replaced with NaN. @@ -2442,7 +2442,7 @@ def test_int_source_with_sentinel_hit(self, tmp_path): """Int source + sentinel hit -> nodata set, masked_nodata=True (promoted).""" path = str(tmp_path / "tnss1988_int_hit.tif") _write_int_tiff_1988(path, with_sentinel_hit=True) - da = open_geotiff(path) + da = open_geotiff(path, masked=True) assert da.attrs["nodata"] == 65535 # Eager numpy promotes integer to float64 on the first hit. assert da.dtype.kind == "f" @@ -2460,7 +2460,7 @@ def test_int_source_no_hit_keeps_sentinel(self, tmp_path): """ path = str(tmp_path / "tnss1988_int_no_hit.tif") _write_int_tiff_1988(path, with_sentinel_hit=False) - da = open_geotiff(path) + da = open_geotiff(path, masked=True) assert da.attrs["nodata"] == 65535 assert da.dtype.kind in ("u", "i") assert da.attrs["masked_nodata"] is False @@ -2878,7 +2878,7 @@ def test_round_trip_preserves_masked_nodata_true(self, tmp_path): src = tmp_path / "test_1988_round_trip_src.tif" _write_float_tiff_1988(str(src), with_sentinel=True) - da = open_geotiff(str(src)) + da = open_geotiff(str(src), masked=True) assert da.attrs["masked_nodata"] is True # The reader promoted the sentinel value to NaN. arr_in = np.asarray(da.data) @@ -3300,7 +3300,7 @@ def test_open_geotiff_int_finite_nodata_unaffected(tmp_path): validator must only reject non-finite / fractional sentinels. """ path = _build_uint16_tiff('30', tmp_path) - da = open_geotiff(path) + da = open_geotiff(path, masked=True) # 30 matches a real pixel; the sentinel-to-NaN promotion fires. assert da.dtype == np.float64 assert np.isnan(da.values[1, 0]) diff --git a/xrspatial/geotiff/tests/unit/test_signatures.py b/xrspatial/geotiff/tests/unit/test_signatures.py index 7114039a9..12dc08ffd 100644 --- a/xrspatial/geotiff/tests/unit/test_signatures.py +++ b/xrspatial/geotiff/tests/unit/test_signatures.py @@ -403,6 +403,9 @@ def test_write_geotiff_gpu_streaming_buffer_bytes_runtime_noop(tmp_path): "bbox", "overview_level", "band", + # rioxarray ``open_rasterio`` name for the DataArray. Sits immediately + # before its deprecated ``name`` alias. + "default_name", "name", "chunks", "gpu", @@ -428,7 +431,17 @@ def test_write_geotiff_gpu_streaming_buffer_bytes_runtime_noop(tmp_path): "allow_experimental_codecs", "allow_internal_only_jpeg", "band_nodata", + # rioxarray ``open_rasterio`` masking flag (default False). Sits + # immediately before its deprecated ``mask_nodata`` alias. + "masked", "mask_nodata", + # rioxarray-compatible read options. ``mask_and_scale`` / + # ``parse_coordinates`` are also threaded into ``read_geotiff_dask``; + # ``lock`` / ``cache`` are open_geotiff-only accept-and-warn shims. + "mask_and_scale", + "parse_coordinates", + "lock", + "cache", ) @@ -2095,7 +2108,7 @@ def test_read_geotiff_gpu_chunks_max_pixels_rejects_oversized(small_tiff_path): def test_open_geotiff_chunks_name_flows_through(small_tiff_path): path, arr = small_tiff_path - da = open_geotiff(path, chunks=4, name="dispatch_dask") + da = open_geotiff(path, chunks=4, default_name="dispatch_dask") assert da.name == "dispatch_dask" np.testing.assert_array_equal(da.values, arr) @@ -2103,7 +2116,7 @@ def test_open_geotiff_chunks_name_flows_through(small_tiff_path): @requires_gpu def test_open_geotiff_gpu_name_flows_through(small_tiff_path): path, arr = small_tiff_path - da = open_geotiff(path, gpu=True, name="dispatch_gpu") + da = open_geotiff(path, gpu=True, default_name="dispatch_gpu") assert da.name == "dispatch_gpu" np.testing.assert_array_equal(da.data.get(), arr) @@ -2111,7 +2124,7 @@ def test_open_geotiff_gpu_name_flows_through(small_tiff_path): @requires_gpu def test_open_geotiff_gpu_chunks_name_flows_through(small_tiff_path): path, arr = small_tiff_path - da = open_geotiff(path, gpu=True, chunks=4, name="dispatch_dask_gpu") + da = open_geotiff(path, gpu=True, chunks=4, default_name="dispatch_dask_gpu") assert da.name == "dispatch_dask_gpu" np.testing.assert_array_equal(da.data.compute().get(), arr) From f657146531b9277151936921274959fbd94ecf9f Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Fri, 5 Jun 2026 07:37:12 -0700 Subject: [PATCH 3/6] Migrate remaining geotiff tests for masked default flip (#2961) --- xrspatial/geotiff/__init__.py | 15 +++-- .../tests/gpu/test_kernels_and_kwargs.py | 24 ++++---- xrspatial/geotiff/tests/gpu/test_reader.py | 20 +++---- xrspatial/geotiff/tests/gpu/test_writer.py | 6 +- .../tests/integration/test_dask_pipeline.py | 16 ++--- .../tests/parity/test_backend_matrix.py | 13 ++-- .../geotiff/tests/parity/test_finalization.py | 4 +- .../geotiff/tests/parity/test_reference.py | 8 +-- xrspatial/geotiff/tests/read/test_basic.py | 2 +- xrspatial/geotiff/tests/read/test_dtypes.py | 4 +- .../tests/release_gates/test_features.py | 8 +-- .../release_gates/test_stable_features.py | 15 ++++- xrspatial/geotiff/tests/test_edge_cases.py | 4 +- xrspatial/geotiff/tests/test_round_trip.py | 6 +- xrspatial/geotiff/tests/unit/test_ifd.py | 6 +- xrspatial/geotiff/tests/unit/test_metadata.py | 4 +- .../geotiff/tests/unit/test_photometric.py | 20 +++---- xrspatial/geotiff/tests/vrt/test_metadata.py | 16 ++--- xrspatial/geotiff/tests/write/test_basic.py | 4 +- xrspatial/geotiff/tests/write/test_nodata.py | 59 +++++++++++++------ .../geotiff/tests/write/test_overview.py | 14 ++--- .../geotiff/tests/write/test_streaming.py | 2 +- 22 files changed, 154 insertions(+), 116 deletions(-) diff --git a/xrspatial/geotiff/__init__.py b/xrspatial/geotiff/__init__.py index caa418812..4c8549baa 100644 --- a/xrspatial/geotiff/__init__.py +++ b/xrspatial/geotiff/__init__.py @@ -512,7 +512,7 @@ def open_geotiff(source: str | BinaryIO, *, mask_nodata: bool = _MASK_NODATA_DEPRECATED_SENTINEL, # type: ignore[assignment] mask_and_scale: bool = False, parse_coordinates: bool = True, - lock=None, + lock: object | None = None, cache: bool = True, ) -> xr.DataArray: """Read a GeoTIFF, COG, or VRT file into an xarray.DataArray. @@ -719,12 +719,15 @@ def open_geotiff(source: str | BinaryIO, *, Supported on the CPU eager and dask paths; combining ``parse_coordinates=False`` with ``gpu=True`` or a ``.vrt`` source raises ``ValueError``. - lock, cache + lock : object or None [advanced] Accepted for ``open_rasterio`` signature compatibility - but have no effect: xrspatial's reader re-opens the source per - window, so there is no shared GDAL handle to lock and no caching - layer to toggle. Passing a non-default value emits a - ``GeoTIFFFallbackWarning``. + but has no effect: xrspatial's reader re-opens the source per + window, so there is no shared GDAL handle to lock. Passing a + non-default value emits a ``GeoTIFFFallbackWarning``. + cache : bool + [advanced] Accepted for ``open_rasterio`` signature compatibility + but has no effect: xrspatial has no caching backend to toggle. + Passing a non-default value emits a ``GeoTIFFFallbackWarning``. allow_rotated : bool, default False [advanced] Read-only opt-in. ``to_geotiff`` does not currently emit ``rotated_affine``; it rejects DataArrays that carry the diff --git a/xrspatial/geotiff/tests/gpu/test_kernels_and_kwargs.py b/xrspatial/geotiff/tests/gpu/test_kernels_and_kwargs.py index ba2e51625..c3a761d50 100644 --- a/xrspatial/geotiff/tests/gpu/test_kernels_and_kwargs.py +++ b/xrspatial/geotiff/tests/gpu/test_kernels_and_kwargs.py @@ -2483,7 +2483,7 @@ def test_open_geotiff_gpu_mask_nodata_false_threads_through_2052( from xrspatial.geotiff import open_geotiff path, arr = uint16_with_matching_sentinel_2052 - da = open_geotiff(path, gpu=True, mask_nodata=False) + da = open_geotiff(path, gpu=True, masked=False) assert da.dtype == np.uint16 np.testing.assert_array_equal(da.data.get(), arr) @@ -2544,7 +2544,7 @@ def test_open_geotiff_dask_gpu_mask_nodata_false_threads_through_2052( from xrspatial.geotiff import open_geotiff path, arr = uint16_with_matching_sentinel_2052 - da = open_geotiff(path, gpu=True, chunks=2, mask_nodata=False) + da = open_geotiff(path, gpu=True, chunks=2, masked=False) assert da.dtype == np.uint16 computed = da.compute() @@ -2584,7 +2584,7 @@ def test_open_geotiff_vrt_mask_nodata_false_threads_through_2052( from xrspatial.geotiff import open_geotiff vrt_path, arr = uint16_vrt_with_matching_sentinel_2052 - da = open_geotiff(vrt_path, mask_nodata=False) + da = open_geotiff(vrt_path, masked=False) assert da.dtype == np.uint16 np.testing.assert_array_equal(np.asarray(da.values), arr) @@ -2637,7 +2637,7 @@ def test_open_geotiff_vrt_chunked_mask_nodata_false_threads_through_2052( from xrspatial.geotiff import open_geotiff vrt_path, arr = uint16_vrt_with_matching_sentinel_2052 - da = open_geotiff(vrt_path, chunks=2, mask_nodata=False) + da = open_geotiff(vrt_path, chunks=2, masked=False) assert da.dtype == np.uint16 computed = da.compute() @@ -2651,8 +2651,8 @@ def test_cross_backend_parity_eager_dask_numpy_2052( path, arr = uint16_with_matching_sentinel_2052 - eager = open_geotiff(path, mask_nodata=False) - dask_ = open_geotiff(path, chunks=2, mask_nodata=False).compute() + eager = open_geotiff(path, masked=False) + dask_ = open_geotiff(path, chunks=2, masked=False).compute() assert eager.dtype == np.uint16 assert dask_.dtype == np.uint16 @@ -2668,8 +2668,8 @@ def test_cross_backend_parity_eager_gpu_2052( path, arr = uint16_with_matching_sentinel_2052 - eager = open_geotiff(path, mask_nodata=False) - gpu = open_geotiff(path, gpu=True, mask_nodata=False) + eager = open_geotiff(path, masked=False) + gpu = open_geotiff(path, gpu=True, masked=False) assert eager.dtype == np.uint16 assert gpu.dtype == np.uint16 @@ -2685,9 +2685,9 @@ def test_cross_backend_parity_eager_dask_gpu_2052( path, arr = uint16_with_matching_sentinel_2052 - eager = open_geotiff(path, mask_nodata=False) + eager = open_geotiff(path, masked=False) dgpu = open_geotiff( - path, gpu=True, chunks=2, mask_nodata=False).compute() + path, gpu=True, chunks=2, masked=False).compute() assert eager.dtype == np.uint16 assert dgpu.dtype == np.uint16 @@ -2705,8 +2705,8 @@ def test_cross_backend_parity_eager_vrt_2052( tif_path, arr = uint16_with_matching_sentinel_2052 vrt_path, _ = uint16_vrt_with_matching_sentinel_2052 - eager = open_geotiff(tif_path, mask_nodata=False) - vrt = open_geotiff(vrt_path, mask_nodata=False) + eager = open_geotiff(tif_path, masked=False) + vrt = open_geotiff(vrt_path, masked=False) assert eager.dtype == vrt.dtype == np.uint16 np.testing.assert_array_equal(eager.values, np.asarray(vrt.values)) diff --git a/xrspatial/geotiff/tests/gpu/test_reader.py b/xrspatial/geotiff/tests/gpu/test_reader.py index 2f625973c..4b6a2091d 100644 --- a/xrspatial/geotiff/tests/gpu/test_reader.py +++ b/xrspatial/geotiff/tests/gpu/test_reader.py @@ -615,8 +615,8 @@ def test_gpu_uint16_nodata_promoted_and_masked_tiled_1542(tmp_path): write(arr, path, nodata=65535, compression='deflate', tiled=True, tile_size=16) - cpu = open_geotiff(path) - gpu = open_geotiff(path, gpu=True) + cpu = open_geotiff(path, masked=True) + gpu = open_geotiff(path, gpu=True, masked=True) assert cpu.dtype == gpu.dtype == np.float64 assert cpu.attrs.get('nodata') == 65535.0 @@ -638,8 +638,8 @@ def test_gpu_uint16_nodata_promoted_and_masked_stripped_1542(tmp_path): path = str(tmp_path / 'gpu_u16_nodata_1542_stripped.tif') write(arr, path, nodata=65535, compression='deflate', tiled=False) - cpu = open_geotiff(path) - gpu = open_geotiff(path, gpu=True) + cpu = open_geotiff(path, masked=True) + gpu = open_geotiff(path, gpu=True, masked=True) assert cpu.dtype == gpu.dtype == np.float64 assert gpu.attrs.get('nodata') == 65535.0 @@ -718,10 +718,10 @@ def test_gpu_all_four_backends_agree_on_nodata_1542(tmp_path): write(arr, path, nodata=65535, compression='deflate', tiled=True, tile_size=16) - da_np = open_geotiff(path) - da_dask = open_geotiff(path, chunks=512) - da_gpu = open_geotiff(path, gpu=True) - da_gpu_dask = open_geotiff(path, gpu=True, chunks=512) + da_np = open_geotiff(path, masked=True) + da_dask = open_geotiff(path, chunks=512, masked=True) + da_gpu = open_geotiff(path, gpu=True, masked=True) + da_gpu_dask = open_geotiff(path, gpu=True, chunks=512, masked=True) for label, da in [('np', da_np), ('dask+np', da_dask), ('gpu', da_gpu), ('gpu+dask', da_gpu_dask)]: @@ -750,8 +750,8 @@ def test_gpu_int16_negative_nodata_1542(tmp_path): write(arr, path, nodata=-9999, compression='deflate', tiled=True, tile_size=16) - cpu = open_geotiff(path) - gpu = open_geotiff(path, gpu=True) + cpu = open_geotiff(path, masked=True) + gpu = open_geotiff(path, gpu=True, masked=True) assert cpu.dtype == gpu.dtype == np.float64 assert gpu.attrs.get('nodata') == -9999.0 np.testing.assert_array_equal(np.isnan(cpu.values), diff --git a/xrspatial/geotiff/tests/gpu/test_writer.py b/xrspatial/geotiff/tests/gpu/test_writer.py index bf9255e14..ff6de43d0 100644 --- a/xrspatial/geotiff/tests/gpu/test_writer.py +++ b/xrspatial/geotiff/tests/gpu/test_writer.py @@ -1454,9 +1454,9 @@ def test_gpu_writer_cog_overview_sentinel_roundtrip_1948(): ) # Read full-resolution and the two overview levels. - full = open_geotiff(path) - ov1 = open_geotiff(path, overview_level=1) - ov2 = open_geotiff(path, overview_level=2) + full = open_geotiff(path, masked=True) + ov1 = open_geotiff(path, overview_level=1, masked=True) + ov2 = open_geotiff(path, overview_level=2, masked=True) # Full-resolution: sentinel pixels survive as NaN (the read path # masks the sentinel back to NaN since attrs['nodata'] is set). diff --git a/xrspatial/geotiff/tests/integration/test_dask_pipeline.py b/xrspatial/geotiff/tests/integration/test_dask_pipeline.py index b87959ecb..3ca4b741b 100644 --- a/xrspatial/geotiff/tests/integration/test_dask_pipeline.py +++ b/xrspatial/geotiff/tests/integration/test_dask_pipeline.py @@ -125,7 +125,7 @@ def uint16_with_sentinel_only_in_corner_dask_int_nodata_chunks(tmp_path): def test_eager_promotes_to_float64_and_masks(uint16_with_sentinel_only_in_corner_dask_int_nodata_chunks): # noqa: E501 """Baseline: the eager path produces float64 with 4 NaNs.""" path, _ = uint16_with_sentinel_only_in_corner_dask_int_nodata_chunks - eager = open_geotiff(path) + eager = open_geotiff(path, masked=True) assert eager.dtype == np.float64 assert np.isnan(eager.values).sum() == 4 assert np.isnan(eager.values[6:8, 6:8]).all() @@ -139,8 +139,8 @@ def test_dask_chunks_4_matches_eager(uint16_with_sentinel_only_in_corner_dask_in chunk back to uint16 at concat time. """ path, _ = uint16_with_sentinel_only_in_corner_dask_int_nodata_chunks - eager = open_geotiff(path) - dk = open_geotiff(path, chunks=4) + eager = open_geotiff(path, masked=True) + dk = open_geotiff(path, masked=True, chunks=4) assert dk.dtype == np.float64 computed = dk.compute() assert computed.dtype == np.float64 @@ -160,7 +160,7 @@ def test_dask_chunks_2_per_chunk_dtype_uniform( as uint16 because the mask never matched there. """ path, _ = uint16_with_sentinel_only_in_corner_dask_int_nodata_chunks - dk = open_geotiff(path, chunks=2) + dk = open_geotiff(path, masked=True, chunks=2) blocks = dk.data.to_delayed().flatten() for i, block in enumerate(blocks): chunk = block.compute() @@ -323,8 +323,8 @@ def uint16_with_sentinel_in_first_chunk_dask_no_op_astype(tmp_path): def test_uint16_mask_path_still_promotes(uint16_with_sentinel_in_first_chunk_dask_no_op_astype): """The int-sentinel float64 promotion still runs when sentinels are present.""" path, arr = uint16_with_sentinel_in_first_chunk_dask_no_op_astype - eager = open_geotiff(path) - dk = open_geotiff(path, chunks=4) + eager = open_geotiff(path, masked=True) + dk = open_geotiff(path, masked=True, chunks=4) assert dk.dtype == np.float64 computed = dk.compute() assert computed.dtype == np.float64 @@ -740,7 +740,7 @@ def test_all_nan_with_sentinel(self, tmp_path): ) assert not np.isnan(raw).any() # Public read still maps the sentinel back to NaN. - result = open_geotiff(path) + result = open_geotiff(path, masked=True) assert np.isnan(result.values).all() assert result.attrs.get('nodata') == pytest.approx(-9999.0) @@ -797,7 +797,7 @@ def test_mixed_nan_plus_minus_inf(self, tmp_path): "found surviving NaN floats" ) # Public read maps the sentinel back to NaN, keeps Inf as-is. - result = open_geotiff(path) + result = open_geotiff(path, masked=True) assert np.isnan(result.values[0, 1]) assert np.isnan(result.values[2, 2]) assert result.values[1, 0] == np.inf diff --git a/xrspatial/geotiff/tests/parity/test_backend_matrix.py b/xrspatial/geotiff/tests/parity/test_backend_matrix.py index 546065b7f..5feeda32d 100644 --- a/xrspatial/geotiff/tests/parity/test_backend_matrix.py +++ b/xrspatial/geotiff/tests/parity/test_backend_matrix.py @@ -454,6 +454,7 @@ def _build_miniswhite(dir_path: Path, target: Path) -> Path: expected_masked=True, source_type=_SRC_LOCAL_TIFF, builder=_build_float32_with_nodata, + read_kwargs={"masked": True}, ), _FixtureSpec( fix_id="int8-unmasked", @@ -1790,19 +1791,19 @@ class _ApBackend: def _ap_open_eager(path): - return open_geotiff(path) + return open_geotiff(path, masked=True) def _ap_open_dask(path): - return open_geotiff(path, chunks=16) + return open_geotiff(path, chunks=16, masked=True) def _ap_open_gpu(path): - return open_geotiff(path, gpu=True) + return open_geotiff(path, gpu=True, masked=True) def _ap_open_dask_gpu(path): - return open_geotiff(path, gpu=True, chunks=16) + return open_geotiff(path, gpu=True, chunks=16, masked=True) def _ap_open_vrt(path, meta): @@ -1875,7 +1876,7 @@ def test_canonical_attrs_match_across_backends(tmp_path, fixture): path = str(tmp_path / f'attrs_parity_{fixture.name}.tif') meta = fixture.writer(path) - baseline = _ap_attrs_for_parity(open_geotiff(path).attrs) + baseline = _ap_attrs_for_parity(open_geotiff(path, masked=True).attrs) divergences = {} for backend in _AP_AVAILABLE_BACKENDS: @@ -1915,7 +1916,7 @@ def test_canonical_attrs_keys_match_across_backends(tmp_path, fixture): path = str(tmp_path / f'attrs_parity_keys_{fixture.name}.tif') meta = fixture.writer(path) - baseline_keys = set(_ap_attrs_for_parity(open_geotiff(path).attrs).keys()) + baseline_keys = set(_ap_attrs_for_parity(open_geotiff(path, masked=True).attrs).keys()) diffs = {} for backend in _AP_AVAILABLE_BACKENDS: diff --git a/xrspatial/geotiff/tests/parity/test_finalization.py b/xrspatial/geotiff/tests/parity/test_finalization.py index 853206e7e..b43a4a94e 100644 --- a/xrspatial/geotiff/tests/parity/test_finalization.py +++ b/xrspatial/geotiff/tests/parity/test_finalization.py @@ -564,7 +564,7 @@ def test_float_sentinel_match_and_mask(tmp_path): path = str(tmp_path / 'eager_parity_2179_float_sentinel.tif') _write_with_nodata(arr, path, nodata=-9999.0) - cpu, gpu = _read_both(path) + cpu, gpu = _read_both(path, masked=True) # dtype + masked_nodata first: float source stays at its declared # dtype on both backends; the mask substitutes NaN. @@ -592,7 +592,7 @@ def test_int_in_range_sentinel_promotes_to_float(tmp_path): path = str(tmp_path / 'eager_parity_2179_int_sentinel.tif') _write_with_nodata(arr, path, nodata=65535) - cpu, gpu = _read_both(path) + cpu, gpu = _read_both(path, masked=True) # Integer promotion fires on both backends. assert cpu.dtype == np.float64 diff --git a/xrspatial/geotiff/tests/parity/test_reference.py b/xrspatial/geotiff/tests/parity/test_reference.py index 96b89b662..93d1897a9 100644 --- a/xrspatial/geotiff/tests/parity/test_reference.py +++ b/xrspatial/geotiff/tests/parity/test_reference.py @@ -339,7 +339,7 @@ def nan_sentinel_path(self, tmp_path): def test_eager_path_baseline(self, nan_sentinel_path): """Baseline: eager path replaces the sentinel with NaN.""" path, _ = nan_sentinel_path - result = open_geotiff(path) + result = open_geotiff(path, masked=True) assert np.isnan(result.values[2, 2]) assert np.isnan(result.values[6, 0]) assert result.values[0, 0] == 0.0 # non-sentinel survives @@ -362,7 +362,7 @@ def test_dask_numpy_chunks_smaller_than_sentinel_block(self, nan_sentinel_path): exercises the per-block sentinel comparison. """ path, _ = nan_sentinel_path - dk = open_geotiff(path, chunks=2).compute() + dk = open_geotiff(path, chunks=2, masked=True).compute() assert np.isnan(dk.values[2, 2]) assert np.isnan(dk.values[3, 3]) assert np.isnan(dk.values[6, 0]) @@ -525,7 +525,7 @@ def _parity_check_single_band( ras_crs = ds.crs ras_nodata = ds.nodata - xrs = open_geotiff(path) + xrs = open_geotiff(path, masked=True) xrs_np = np.asarray(xrs) # Pixel parity. For integer rasters with nodata, xrspatial promotes to @@ -661,7 +661,7 @@ def test_round_trip(self, tmp_path): ras_crs = ds.crs ras_nodata = ds.nodata - xrs = open_geotiff(str(path)) # dims (y, x, band) + xrs = open_geotiff(str(path), masked=True) # dims (y, x, band) xrs_np = np.asarray(xrs) # xrspatial lays bands on the trailing axis; transpose for compare. diff --git a/xrspatial/geotiff/tests/read/test_basic.py b/xrspatial/geotiff/tests/read/test_basic.py index fe795e1f1..5dcb98345 100644 --- a/xrspatial/geotiff/tests/read/test_basic.py +++ b/xrspatial/geotiff/tests/read/test_basic.py @@ -650,7 +650,7 @@ def test_full_round_trip_preserves_nodata(self, tmp_path): ) # Read it (nodata=0 -> NaN) - da = open_geotiff(path1) + da = open_geotiff(path1, masked=True) assert np.isnan(da.values[1, 0]) assert da.attrs['nodata'] == 0 diff --git a/xrspatial/geotiff/tests/read/test_dtypes.py b/xrspatial/geotiff/tests/read/test_dtypes.py index 5c145b5c5..7934754a3 100644 --- a/xrspatial/geotiff/tests/read/test_dtypes.py +++ b/xrspatial/geotiff/tests/read/test_dtypes.py @@ -163,7 +163,7 @@ def test_int_with_nodata_float_to_int_raises(self, tmp_path): path = str(tmp_path / 'dtype_nodata_int_eager.tif') to_geotiff(da, path, compression='none') with pytest.raises(ValueError, match='float.*int'): - open_geotiff(path, dtype='int32') + open_geotiff(path, dtype='int32', masked=True) # --------------------------------------------------------------------------- @@ -201,7 +201,7 @@ def test_int_with_nodata_float_to_int_raises_dask(self, tmp_path): path = str(tmp_path / 'dtype_nodata_int_dask.tif') to_geotiff(da, path, compression='none') with pytest.raises(ValueError, match='float.*int'): - open_geotiff(path, dtype='int32', chunks=2) + open_geotiff(path, dtype='int32', chunks=2, masked=True) # --------------------------------------------------------------------------- diff --git a/xrspatial/geotiff/tests/release_gates/test_features.py b/xrspatial/geotiff/tests/release_gates/test_features.py index 5064125ed..397c2f030 100644 --- a/xrspatial/geotiff/tests/release_gates/test_features.py +++ b/xrspatial/geotiff/tests/release_gates/test_features.py @@ -152,7 +152,7 @@ def test_uint8_nodata_masked(self, tmp_path): path = str(tmp_path / 'uint8_nodata.tif') write(arr, path, compression='none', tiled=False, nodata=255) - da = open_geotiff(path) + da = open_geotiff(path, masked=True) assert np.isnan(da.values[1, 1]) assert da.values[0, 1] == 1.0 assert da.dtype == np.float64 # promoted from uint8 @@ -162,7 +162,7 @@ def test_uint16_nodata_masked(self, tmp_path): path = str(tmp_path / 'uint16_nodata.tif') write(arr, path, compression='none', tiled=False, nodata=0) - da = open_geotiff(path) + da = open_geotiff(path, masked=True) assert np.isnan(da.values[0, 1]) assert np.isnan(da.values[1, 1]) assert da.values[0, 0] == 100.0 @@ -172,7 +172,7 @@ def test_int16_nodata_negative(self, tmp_path): path = str(tmp_path / 'int16_nodata.tif') write(arr, path, compression='none', tiled=False, nodata=-9999) - da = open_geotiff(path) + da = open_geotiff(path, masked=True) assert np.isnan(da.values[0, 0]) assert np.isnan(da.values[1, 1]) assert da.values[0, 1] == 10.0 @@ -936,7 +936,7 @@ def test_vrt_float64_fractional_nodata_masked(self, tmp_path): with open(vrt_path, 'w') as f: f.write(vrt_xml) - da = open_geotiff(vrt_path) + da = open_geotiff(vrt_path, masked=True) vals = da.values # The sentinel pixel must be NaN. diff --git a/xrspatial/geotiff/tests/release_gates/test_stable_features.py b/xrspatial/geotiff/tests/release_gates/test_stable_features.py index 99a4aa1e6..f452a1454 100644 --- a/xrspatial/geotiff/tests/release_gates/test_stable_features.py +++ b/xrspatial/geotiff/tests/release_gates/test_stable_features.py @@ -1023,7 +1023,16 @@ def test_release_gate_eager_dask_full_parity( f"`python -m xrspatial.geotiff.tests.golden_corpus.generate`" ) - eager = open_geotiff(str(path), **open_kwargs) + # ``read_geotiff_dask`` keeps the legacy masking-on default + # (``mask_nodata=True``); ``open_geotiff`` flipped its default to + # ``masked=False``. Mirror the dask default on the eager call so the + # two backends are compared under the same masking policy, while + # still honouring an explicit ``mask_nodata=`` in ``open_kwargs`` + # (the masked-nodata-lifecycle scenario). + eager_kwargs = {"masked": open_kwargs.get("mask_nodata", True), + **{k: v for k, v in open_kwargs.items() + if k != "mask_nodata"}} + eager = open_geotiff(str(path), **eager_kwargs) lazy = read_geotiff_dask( str(path), chunks=_EAGER_DASK_CHUNK_SIZE, **open_kwargs, ) @@ -1549,9 +1558,9 @@ def _overview_assert_transform_scales( def _overview_read_levels_eager(path: str) -> dict: - out = {0: open_geotiff(path)} + out = {0: open_geotiff(path, masked=True)} for i, _ in enumerate(_OVERVIEW_FACTORS, start=1): - out[i] = open_geotiff(path, overview_level=i) + out[i] = open_geotiff(path, masked=True, overview_level=i) return out diff --git a/xrspatial/geotiff/tests/test_edge_cases.py b/xrspatial/geotiff/tests/test_edge_cases.py index 67a4e4576..624a0282d 100644 --- a/xrspatial/geotiff/tests/test_edge_cases.py +++ b/xrspatial/geotiff/tests/test_edge_cases.py @@ -491,7 +491,7 @@ def test_crs_and_nodata_no_coords(self, tmp_path): path = str(tmp_path / 'both.tif') to_geotiff(arr, path, crs=4326, nodata=-9999.0, compression='none') - result = open_geotiff(path) + result = open_geotiff(path, masked=True) assert result.attrs['crs'] == 4326 assert np.isnan(result.values[0, 1]) assert result.values[1, 1] == 2.0 @@ -576,7 +576,7 @@ def test_nodata_string_numeric(self, tmp_path): path = str(tmp_path / 'nodata_str.tif') to_geotiff(da, path, compression='none') - result = open_geotiff(path) + result = open_geotiff(path, masked=True) # The nodata sentinel should be masked to NaN on read assert np.isnan(result.values[0, 1]) assert np.isnan(result.values[1, 0]) diff --git a/xrspatial/geotiff/tests/test_round_trip.py b/xrspatial/geotiff/tests/test_round_trip.py index 191ca4965..ad768f11e 100644 --- a/xrspatial/geotiff/tests/test_round_trip.py +++ b/xrspatial/geotiff/tests/test_round_trip.py @@ -76,7 +76,7 @@ def _read_write_read(da: xr.DataArray, tmp_path, tag: str) -> xr.DataArray: """Run one ``write -> read`` cycle on ``da`` and return the new DataArray.""" path = str(tmp_path / f"rt_{tag}_1986.tif") to_geotiff(da, path, compression='none', tiled=False) - return open_geotiff(path) + return open_geotiff(path, masked=True) # Canonical attrs whose values must lock across a write -> read cycle @@ -222,7 +222,7 @@ def test_int32_sentinel_promotes_and_masks(self, tmp_path): write(arr, path, nodata=-9999, geo_transform=_default_gt(), crs_epsg=4326, compression='none', tiled=False) - da1 = open_geotiff(path) + da1 = open_geotiff(path, masked=True) # Dtype drift: int -> float64 with NaN at sentinel. assert da1.dtype == np.float64 assert np.isnan(da1.values[1, 0]) @@ -246,7 +246,7 @@ def test_uint16_sentinel_promotes_and_masks(self, tmp_path): write(arr, path, nodata=65535, geo_transform=_default_gt(), crs_epsg=4326, compression='none', tiled=False) - da1 = open_geotiff(path) + da1 = open_geotiff(path, masked=True) assert da1.dtype == np.float64 assert np.isnan(da1.values[1, 0]) assert da1.attrs.get('nodata') == 65535 diff --git a/xrspatial/geotiff/tests/unit/test_ifd.py b/xrspatial/geotiff/tests/unit/test_ifd.py index eaf548cee..b9934a4c5 100644 --- a/xrspatial/geotiff/tests/unit/test_ifd.py +++ b/xrspatial/geotiff/tests/unit/test_ifd.py @@ -1010,7 +1010,7 @@ def test_sparse_tile_with_nodata_round_trips(self, tmp_path): path = str(tmp_path / 'sparse_nodata_2426.tif') _write_sparse_tiled(path, nodata=0) - arr = open_geotiff(path) + arr = open_geotiff(path, masked=True) arr_np = np.asarray(arr) assert arr_np[:64, :64].sum() == 64 * 64 * 100 assert np.all(np.isnan(arr_np[:64, 64:])) @@ -1046,7 +1046,7 @@ def test_sparse_strip_with_nodata(self, tmp_path): path = str(tmp_path / 'sparse_strips_2426.tif') _write_sparse_stripped_small(path, nodata=0) - arr = open_geotiff(path) + arr = open_geotiff(path, masked=True) arr_np = np.asarray(arr) assert arr_np[:32, :].sum() == 32 * 128 * 200 assert np.all(np.isnan(arr_np[32:, :])) @@ -1060,7 +1060,7 @@ def test_sparse_tile_gpu_round_trip(self, tmp_path): path = str(tmp_path / 'sparse_gpu_2426.tif') _write_sparse_tiled(path, nodata=0) - arr = open_geotiff(path, gpu=True) + arr = open_geotiff(path, gpu=True, masked=True) # GPU read applies the high-level nodata mask: the # source uint16 raster is promoted to float64 and sentinel # values become NaN, matching the CPU eager path. diff --git a/xrspatial/geotiff/tests/unit/test_metadata.py b/xrspatial/geotiff/tests/unit/test_metadata.py index f0a1e6990..4eaee4b86 100644 --- a/xrspatial/geotiff/tests/unit/test_metadata.py +++ b/xrspatial/geotiff/tests/unit/test_metadata.py @@ -1099,7 +1099,7 @@ def test_uint16_with_nodata_promotes_to_float64(self, tmp_path): path = str(tmp_path / 'u16_nodata_1484.tif') write(arr, path, nodata=65535, compression='none', tiled=False) - da = open_geotiff(path) + da = open_geotiff(path, masked=True) assert da.dtype == np.float64 assert np.isnan(da.values[1, 0]) np.testing.assert_array_equal( @@ -1112,7 +1112,7 @@ def test_uint16_with_nodata_dtype_uint16_raises(self, tmp_path): path = str(tmp_path / 'u16_nodata_cast_1484.tif') write(arr, path, nodata=65535, compression='none', tiled=False) with pytest.raises(ValueError, match='float.*int'): - open_geotiff(path, dtype='uint16') + open_geotiff(path, dtype='uint16', masked=True) def test_uint16_no_nodata_keeps_dtype(self, tmp_path): arr = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.uint16) diff --git a/xrspatial/geotiff/tests/unit/test_photometric.py b/xrspatial/geotiff/tests/unit/test_photometric.py index 2bfef535e..e0758df11 100644 --- a/xrspatial/geotiff/tests/unit/test_photometric.py +++ b/xrspatial/geotiff/tests/unit/test_photometric.py @@ -270,7 +270,7 @@ def test_eager_numpy_miniswhite_nodata( path = str(tmp_path / "mw_eager.tif") _write_miniswhite_tiff(path, stored, nodata_str) - arr = open_geotiff(path) + arr = open_geotiff(path, masked=True) assert arr.attrs["nodata"] == sentinel np.testing.assert_array_equal(arr.values, expected) @@ -290,7 +290,7 @@ def test_dask_miniswhite_nodata(tmp_path, case_factory, nodata_str, sentinel): path = str(tmp_path / "mw_dask.tif") _write_miniswhite_tiff(path, stored, nodata_str) - arr = open_geotiff(path, chunks=2).compute() + arr = open_geotiff(path, chunks=2, masked=True).compute() assert arr.attrs["nodata"] == sentinel np.testing.assert_array_equal(arr.values, expected) @@ -308,7 +308,7 @@ def test_eager_miniswhite_uint8_no_collision(tmp_path): [[np.nan, 155.0, 55.0], [205.0, np.nan, 25.0]], dtype=np.float64 ) - arr = open_geotiff(path) + arr = open_geotiff(path, masked=True) np.testing.assert_array_equal(arr.values, expected) @@ -332,13 +332,13 @@ def test_miniswhite_backend_parity_uint8_nodata_zero(tmp_path): path = str(tmp_path / "mw_parity.tif") _write_miniswhite_tiff(path, stored, "0", tiled=True) - eager = open_geotiff(path).values - dask_result = open_geotiff(path, chunks=2).compute().values + eager = open_geotiff(path, masked=True).values + dask_result = open_geotiff(path, chunks=2, masked=True).compute().values np.testing.assert_array_equal(eager, expected) np.testing.assert_array_equal(dask_result, expected) np.testing.assert_array_equal(eager, dask_result) if _HAS_GPU: - gpu = open_geotiff(path, gpu=True).data.get() + gpu = open_geotiff(path, gpu=True, masked=True).data.get() np.testing.assert_array_equal(gpu, expected) np.testing.assert_array_equal(eager, gpu) @@ -349,7 +349,7 @@ def test_gpu_eager_miniswhite_uint8_nodata_zero(tmp_path): path = str(tmp_path / "mw_uint8_gpu.tif") _write_miniswhite_tiff(path, stored, "0", tiled=True) - arr = open_geotiff(path, gpu=True) + arr = open_geotiff(path, gpu=True, masked=True) assert arr.attrs["nodata"] == 0 np.testing.assert_array_equal(arr.data.get(), expected) @@ -375,7 +375,7 @@ def test_gpu_sparse_tile_miniswhite_nodata_zero(tmp_path): _write_miniswhite_tiff(path, stored, "0", tiled=True) _patch_first_tile_sparse(path) - arr = open_geotiff(path, gpu=True) + arr = open_geotiff(path, gpu=True, masked=True) out = arr.data.get() assert np.all(np.isnan(out[:16, :16])) @@ -433,7 +433,7 @@ def test_miniswhite_float_with_nodata_round_trips_nan(tmp_path): arr = np.array([[10.0, np.nan, 20.0, 30.0]], dtype=np.float32) path = tmp_path / "rt_float_nd.tif" to_geotiff(_da(arr), str(path), photometric='miniswhite', nodata=-9999.0) - r = open_geotiff(str(path)) + r = open_geotiff(str(path), masked=True) out = np.asarray(r.values) assert np.isnan(out[0, 1]), ( "nodata position must round-trip back to NaN after MinIsWhite " @@ -458,7 +458,7 @@ def test_miniswhite_uint16_in_range_nodata_round_trips_nan(tmp_path): 'assume_square_pixels_for_degenerate_axis': True}, ) to_geotiff(da_in, str(path), photometric='miniswhite', nodata=9999) - r = open_geotiff(str(path)) + r = open_geotiff(str(path), masked=True) out = np.asarray(r.values) assert np.isnan(out[0, 1]), ( f"in-range uint nodata must round-trip to NaN through the " diff --git a/xrspatial/geotiff/tests/vrt/test_metadata.py b/xrspatial/geotiff/tests/vrt/test_metadata.py index 4c5a30c28..5e6a7684b 100644 --- a/xrspatial/geotiff/tests/vrt/test_metadata.py +++ b/xrspatial/geotiff/tests/vrt/test_metadata.py @@ -218,7 +218,7 @@ def test_vrt_eager_float_source_mask_off_reports_false(tmp_path): """Eager VRT + float source + ``mask_nodata=False`` must report ``masked_nodata=False``. Pre-fix rule (dtype alone) said ``True``.""" vrt = _masked_nodata_attr_write_float_vrt(tmp_path, 'tmp_2159_eager_float_src.tif', 'tmp_2159_eager_unmasked.vrt') # noqa: E501 - out = open_geotiff(vrt, mask_nodata=False) + out = open_geotiff(vrt, masked=False) assert out.attrs.get('nodata') == -9999.0 assert out.attrs.get('masked_nodata') is False, f"caller opted out of masking but attrs say masked_nodata={out.attrs.get('masked_nodata')!r}" # noqa: E501 @@ -227,7 +227,7 @@ def test_vrt_eager_float_source_mask_on_reports_true(tmp_path): """Canonical direction: float source + masking on. The masking step runs, attr says True. Regression guard.""" vrt = _masked_nodata_attr_write_float_vrt(tmp_path, 'tmp_2159_eager_float_src_masked.tif', 'tmp_2159_eager_masked.vrt') # noqa: E501 - out = open_geotiff(vrt) + out = open_geotiff(vrt, masked=True) assert out.attrs.get('nodata') == -9999.0 assert out.attrs.get('masked_nodata') is True @@ -238,7 +238,7 @@ def test_vrt_eager_int_source_mask_off_reports_false(tmp_path): got this right (int dtype -> False); keep it green under the new ``mask_nodata and dtype.kind == 'f'`` rule.""" vrt = _masked_nodata_attr_write_int_vrt(tmp_path, 'tmp_2159_eager_int_src.tif', 'tmp_2159_eager_int_unmasked.vrt') # noqa: E501 - out = open_geotiff(vrt, mask_nodata=False) + out = open_geotiff(vrt, masked=False) assert out.dtype.kind == 'i' assert out.attrs.get('masked_nodata') is False @@ -250,7 +250,7 @@ def test_vrt_eager_float_source_mask_off_with_cast_reports_false(tmp_path): ``mask_nodata=False`` and says False. The caller-supplied cast is still recorded via ``nodata_dtype_cast``.""" vrt = _masked_nodata_attr_write_float_vrt(tmp_path, 'tmp_2159_eager_float_src_cast.tif', 'tmp_2159_eager_unmasked_cast.vrt') # noqa: E501 - out = open_geotiff(vrt, mask_nodata=False, dtype=np.float64) + out = open_geotiff(vrt, masked=False, dtype=np.float64) assert out.dtype == np.float64 assert out.attrs.get('masked_nodata') is False assert out.attrs.get('nodata_dtype_cast') == 'float64' @@ -301,7 +301,7 @@ def test_vrt_attr_matches_dask_backend_under_mask_off(tmp_path): cross-backend invariant the contract at ``_attrs._set_nodata_attrs`` calls out.""" vrt = _masked_nodata_attr_write_float_vrt(tmp_path, 'tmp_2159_xbackend_src.tif', 'tmp_2159_xbackend.vrt') # noqa: E501 - eager = open_geotiff(vrt, mask_nodata=False, dtype=np.float64) + eager = open_geotiff(vrt, masked=False, dtype=np.float64) chunked = read_geotiff_dask(vrt, chunks=2, mask_nodata=False, dtype=np.float64) assert eager.attrs.get('masked_nodata') is False assert chunked.attrs.get('masked_nodata') is False @@ -504,7 +504,7 @@ def test_vrt_uint16_nodata_promotes_to_float64(tmp_path): """VRT route NaN-masks integer-with-nodata, matching open_geotiff.""" tif = str(tmp_path / 'src_1564.tif') _int_nodata_write_uint16_with_nodata_tif(tif, sentinel=65535) - eager = open_geotiff(tif) + eager = open_geotiff(tif, masked=True) assert eager.dtype == np.float64 assert np.isnan(eager.values[1, 0]) vrt_path = str(tmp_path / 'src_1564.vrt') @@ -1378,12 +1378,12 @@ def _metadata_parity_build_integer_with_nodata_vrt(tmp_path: pathlib.Path) -> st def _metadata_parity_read_eager_numpy(vrt_path: str): """Eager numpy via the dispatcher (mirrors public surface).""" - return open_geotiff(vrt_path) + return open_geotiff(vrt_path, masked=True) def _metadata_parity_read_dask(vrt_path: str): """Dask via the dispatcher, then ``compute()`` for value parity.""" - lazy = open_geotiff(vrt_path, chunks=2) + lazy = open_geotiff(vrt_path, chunks=2, masked=True) return lazy.compute() diff --git a/xrspatial/geotiff/tests/write/test_basic.py b/xrspatial/geotiff/tests/write/test_basic.py index 09a79819b..c39f46bd8 100644 --- a/xrspatial/geotiff/tests/write/test_basic.py +++ b/xrspatial/geotiff/tests/write/test_basic.py @@ -280,7 +280,7 @@ def test_nodata_sentinel_float_disk_vs_read(tmp_path): np.testing.assert_array_equal(raw[~nan_mask], expected[~nan_mask]) # Read back through open_geotiff: sentinel becomes NaN again. - out = open_geotiff(path) + out = open_geotiff(path, masked=True) np.testing.assert_array_equal(np.isnan(out.data), nan_mask) np.testing.assert_array_equal(out.data[~nan_mask], expected[~nan_mask]) assert out.attrs.get('nodata') == -9999.0 @@ -303,7 +303,7 @@ def test_nodata_uint8_sentinel(tmp_path): np.testing.assert_array_equal(raw, arr) # Read-back: open_geotiff promotes integer with nodata to float + NaN. - out = open_geotiff(path) + out = open_geotiff(path, masked=True) assert out.dtype.kind == 'f' assert np.isnan(out.data[0, 0]) assert np.isnan(out.data[4, 4]) diff --git a/xrspatial/geotiff/tests/write/test_nodata.py b/xrspatial/geotiff/tests/write/test_nodata.py index ccacaf7ad..380a6babc 100644 --- a/xrspatial/geotiff/tests/write/test_nodata.py +++ b/xrspatial/geotiff/tests/write/test_nodata.py @@ -352,7 +352,7 @@ def _write(self, tmp_path, dtype, sentinel): def test_uint64_max_masked_to_nan(self, tmp_path): path = self._write(str(tmp_path), np.uint64, 2**64 - 1) - da = open_geotiff(path) + da = open_geotiff(path, masked=True) assert da.dtype == np.float64 assert np.isnan(da.values[0, 0]) assert da.values[1, 1] == 100.0 @@ -361,7 +361,7 @@ def test_uint64_max_masked_to_nan(self, tmp_path): def test_int64_max_masked_to_nan(self, tmp_path): path = self._write(str(tmp_path), np.int64, 2**63 - 1) - da = open_geotiff(path) + da = open_geotiff(path, masked=True) assert da.dtype == np.float64 assert np.isnan(da.values[0, 0]) assert da.values[1, 1] == 100.0 @@ -372,7 +372,7 @@ def test_int64_min_masked_to_nan(self, tmp_path): # and worked before the fix. Make sure the new int-first path # has not broken it. path = self._write(str(tmp_path), np.int64, -(2**63)) - da = open_geotiff(path) + da = open_geotiff(path, masked=True) assert da.dtype == np.float64 assert np.isnan(da.values[0, 0]) assert da.values[1, 1] == 100.0 @@ -381,7 +381,7 @@ def test_int64_min_masked_to_nan(self, tmp_path): def test_uint16_max_still_masked(self, tmp_path): # Regression guard: small integer sentinels still work. path = self._write(str(tmp_path), np.uint16, 65535) - da = open_geotiff(path) + da = open_geotiff(path, masked=True) assert da.dtype == np.float64 assert np.isnan(da.values[0, 0]) assert da.values[1, 1] == 100.0 @@ -390,7 +390,7 @@ def test_uint16_max_still_masked(self, tmp_path): def test_int32_negative_still_masked(self, tmp_path): # Regression guard: signed-int small sentinels still work. path = self._write(str(tmp_path), np.int32, -9999) - da = open_geotiff(path) + da = open_geotiff(path, masked=True) assert da.dtype == np.float64 assert np.isnan(da.values[0, 0]) assert da.attrs["nodata"] == -9999 @@ -402,7 +402,7 @@ def test_float_nodata_still_parses(self, tmp_path): da = xr.DataArray(arr, dims=("y", "x")) path = os.path.join(str(tmp_path), "f.tif") to_geotiff(da, path, nodata=-9999.0) - out = open_geotiff(path) + out = open_geotiff(path, masked=True) assert np.isnan(out.values[0, 0]) @@ -579,7 +579,7 @@ def test_open_geotiff_uint16_in_range_nodata_still_masks(tmp_path): da = xr.DataArray(arr, dims=['y', 'x']) path = str(tmp_path / 'uint16_in_range_nodata.tif') to_geotiff(da, path, crs=4326, nodata=65535) - result = open_geotiff(path) + result = open_geotiff(path, masked=True) assert result.dtype == np.float64 # The 65535 pixel should be NaN; the rest unchanged. assert np.isnan(result.values[1, 2]) @@ -652,7 +652,7 @@ def test_regression_dtype_uint16_was_unreachable( """ path, _ = uint16_with_matching_sentinel with pytest.raises(ValueError): - open_geotiff(path, dtype='uint16') + open_geotiff(path, dtype='uint16', masked=True) def test_mask_nodata_false_preserves_uint16(uint16_with_matching_sentinel): @@ -677,14 +677,26 @@ def test_mask_nodata_false_no_dtype_kwarg(uint16_with_matching_sentinel): def test_default_mask_nodata_true_still_promotes( uint16_with_matching_sentinel): - """Default ``mask_nodata=True`` keeps the existing behaviour.""" + """The new default (``masked=False``) does NOT promote or mask. + + ``open_geotiff`` now matches rioxarray's ``open_rasterio``: a bare + read keeps the source integer dtype and leaves the sentinel pixels + untouched. Passing ``masked=True`` restores the old promote-to-NaN + behaviour. + """ path, _ = uint16_with_matching_sentinel + # Default: no masking -> uint16 preserved, sentinels survive. da = open_geotiff(path) - assert da.dtype == np.float64 - assert np.isnan(da.values).sum() == 4 - # Sentinel positions should be NaN. - assert np.isnan(da.values[0, 0]) - assert np.isnan(da.values[1, 2]) + assert da.dtype == np.uint16 + assert not np.isnan(np.asarray(da.values, dtype=np.float64)).any() + assert da.attrs.get('masked_nodata') in (False, None) + + # Opt in: masking promotes to float64 and replaces the sentinel. + masked = open_geotiff(path, masked=True) + assert masked.dtype == np.float64 + assert np.isnan(masked.values).sum() == 4 + assert np.isnan(masked.values[0, 0]) + assert np.isnan(masked.values[1, 2]) def test_no_match_both_modes_agree(uint16_no_match): @@ -748,12 +760,25 @@ def test_dask_path_mask_nodata_false(uint16_with_matching_sentinel): def test_dask_path_default_still_promotes(uint16_with_matching_sentinel): - """The dask default (``mask_nodata=True``) still promotes to float64.""" + """The dask default (``masked=False``) does NOT promote to float64. + + Mirrors the eager-path new default: a bare chunked read keeps the + uint16 source dtype and leaves the sentinel pixels untouched. + ``masked=True`` restores the promote-to-NaN behaviour. + """ path, _ = uint16_with_matching_sentinel + # Default: no masking -> uint16 preserved on the dask graph. da = open_geotiff(path, chunks=2) - assert da.dtype == np.float64 + assert da.dtype == np.uint16 computed = da.compute() - assert np.isnan(computed.values).sum() == 4 + assert computed.dtype == np.uint16 + assert not np.isnan( + np.asarray(computed.values, dtype=np.float64)).any() + + # Opt in: masking promotes to float64 and replaces the sentinel. + masked = open_geotiff(path, chunks=2, masked=True) + assert masked.dtype == np.float64 + assert np.isnan(masked.compute().values).sum() == 4 def test_dask_dtype_cast_with_opt_out(uint16_with_matching_sentinel): diff --git a/xrspatial/geotiff/tests/write/test_overview.py b/xrspatial/geotiff/tests/write/test_overview.py index 878bfef17..db162949b 100644 --- a/xrspatial/geotiff/tests/write/test_overview.py +++ b/xrspatial/geotiff/tests/write/test_overview.py @@ -381,7 +381,7 @@ def test_cpu_cog_overview_mean_partial_block(tmp_path): tiled=True, tile_size=16, overview_levels=[2], overview_resampling='mean') - ov = open_geotiff(p, overview_level=1) + ov = open_geotiff(p, overview_level=1, masked=True) # Top-left 2x2 was all-NaN -> reduces to NaN -> rewritten to -9999 # on disk, then read back as NaN once overview-nodata # inheritance restores attrs['nodata'] and re-masks @@ -960,11 +960,11 @@ def test_to_geotiff_int_cubic_overview_round_trip(tmp_path): to_geotiff(da, str(path), cog=True, overview_resampling='cubic', nodata=-9999, crs=4326) # Level 0: full resolution. - r0 = open_geotiff(str(path), overview_level=0) + r0 = open_geotiff(str(path), overview_level=0, masked=True) uniq_0 = set(np.unique(r0.values[~np.isnan(r0.values)])) assert uniq_0 == {100.0} # Level 1: the historically poisoned level. - r1 = open_geotiff(str(path), overview_level=1) + r1 = open_geotiff(str(path), overview_level=1, masked=True) finite_1 = r1.values[~np.isnan(r1.values)] # All finite values must be 100 (the only valid data value); no ringing. np.testing.assert_array_equal(finite_1, 100.0) @@ -1001,8 +1001,8 @@ def test_to_geotiff_int_cubic_overview_matches_mean_finite_range(tmp_path): nodata=65535, crs=4326) to_geotiff(da, str(mean_path), cog=True, overview_resampling='mean', nodata=65535, crs=4326) - r_cubic = open_geotiff(str(cubic_path), overview_level=0) - r_mean = open_geotiff(str(mean_path), overview_level=0) + r_cubic = open_geotiff(str(cubic_path), overview_level=0, masked=True) + r_mean = open_geotiff(str(mean_path), overview_level=0, masked=True) # Sentinel masks should land on the same pixels for both methods on a # constant valid region with a constant nodata corner. np.testing.assert_array_equal( @@ -1880,7 +1880,7 @@ def test_overview_sentinel_pixels_masked_to_nan(tmp_path, backend_kwargs): expected_nan_counts = {0: 256, 1: 64, 2: 16} for lvl, expected in expected_nan_counts.items(): - da = open_geotiff(path, overview_level=lvl, **backend_kwargs) + da = open_geotiff(path, overview_level=lvl, masked=True, **backend_kwargs) vals = _materialise(da) actual_nan = int(np.isnan(vals).sum()) sentinel_remaining = int((vals == -9999.0).sum()) @@ -1908,7 +1908,7 @@ def test_overview_nanmean_matches_pre_sentinel_value(tmp_path, backend_kwargs): _make_cog_with_nodata(path) for lvl in (0, 1, 2): - da = open_geotiff(path, overview_level=lvl, **backend_kwargs) + da = open_geotiff(path, overview_level=lvl, masked=True, **backend_kwargs) vals = _materialise(da) assert np.nanmean(vals) == pytest.approx(100.0), ( f"backend={backend_kwargs}, overview_level={lvl}: nanmean=" diff --git a/xrspatial/geotiff/tests/write/test_streaming.py b/xrspatial/geotiff/tests/write/test_streaming.py index 750893ae1..bdc9c416d 100644 --- a/xrspatial/geotiff/tests/write/test_streaming.py +++ b/xrspatial/geotiff/tests/write/test_streaming.py @@ -367,7 +367,7 @@ def test_nan_to_nodata(self, tmp_path): path = str(tmp_path / 'nan_1084.tif') to_geotiff(dask_da, path) - result = open_geotiff(path) + result = open_geotiff(path, masked=True) assert np.isnan(result.values[15, 15]) assert result.values[0, 0] == pytest.approx(1.0) From 30bad5da519a81663cecb42af307fc975304d4e6 Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Fri, 5 Jun 2026 07:39:54 -0700 Subject: [PATCH 4/6] Add rioxarray-compat tests for open_geotiff (#2961) --- .../tests/read/test_rioxarray_compat_2961.py | 238 ++++++++++++++++++ 1 file changed, 238 insertions(+) create mode 100644 xrspatial/geotiff/tests/read/test_rioxarray_compat_2961.py diff --git a/xrspatial/geotiff/tests/read/test_rioxarray_compat_2961.py b/xrspatial/geotiff/tests/read/test_rioxarray_compat_2961.py new file mode 100644 index 000000000..1221b143b --- /dev/null +++ b/xrspatial/geotiff/tests/read/test_rioxarray_compat_2961.py @@ -0,0 +1,238 @@ +"""rioxarray ``open_rasterio`` compatibility for ``open_geotiff`` (#2961). + +Covers the renamed parameters and the masking-off default flip: + +* ``masked`` (canonical) <- ``mask_nodata`` (deprecated alias), default + flipped from True to False to match rioxarray. +* ``default_name`` (canonical) <- ``name`` (deprecated alias). +* ``mask_and_scale`` (new): apply GDAL SCALE/OFFSET + mask. +* ``parse_coordinates`` (new): skip x/y coords. +* ``lock`` / ``cache`` (new, accept-and-warn shims). +* GPU / VRT gating for ``mask_and_scale`` / ``parse_coordinates=False``. +""" +import numpy as np +import pytest +import xarray as xr + +from xrspatial.geotiff import open_geotiff, to_geotiff, write_vrt +from xrspatial.geotiff._runtime import GeoTIFFFallbackWarning + + +def _int_sentinel_tiff(path, sentinel=255): + """uint8 raster with one pixel equal to ``sentinel`` declared nodata.""" + data = np.array([[1, 2, sentinel], [4, 5, 6]], dtype=np.uint8) + da = xr.DataArray( + data, + dims=("y", "x"), + coords={"y": [0.5, 1.5], "x": [0.5, 1.5, 2.5]}, + attrs={"nodata": sentinel, "crs": 4326}, + ) + to_geotiff(da, path) + return path + + +def _scale_offset_tiff(path, scale=2.0, offset=10.0, sentinel=255): + """uint8 raster carrying GDAL SCALE/OFFSET metadata + a nodata pixel.""" + data = np.array([[1, 2, 3], [4, 5, sentinel]], dtype=np.uint8) + da = xr.DataArray( + data, + dims=("y", "x"), + coords={"y": [0.5, 1.5], "x": [0.5, 1.5, 2.5]}, + attrs={ + "nodata": sentinel, + "crs": 4326, + "gdal_metadata": {"SCALE": str(scale), "OFFSET": str(offset)}, + }, + ) + to_geotiff(da, path) + return path + + +# --------------------------------------------------------------------------- +# masked default flip + mask_nodata deprecation alias +# --------------------------------------------------------------------------- + +def test_default_does_not_mask(tmp_path): + """A bare read leaves the sentinel in place (rioxarray masked=False).""" + path = _int_sentinel_tiff(str(tmp_path / "t2961_default.tif")) + out = open_geotiff(path) + assert out.dtype == np.uint8 + assert (out.data == 255).any() + assert not np.isnan(out.data.astype(float)).any() + assert out.attrs.get("masked_nodata") is False + # The raw sentinel is still on attrs either way. + assert out.attrs.get("nodata") == 255 + + +def test_masked_true_promotes_and_masks(tmp_path): + path = _int_sentinel_tiff(str(tmp_path / "t2961_masked.tif")) + out = open_geotiff(path, masked=True) + assert out.dtype == np.float64 + assert np.isnan(out.data).sum() == 1 + assert out.attrs.get("masked_nodata") is True + + +def test_mask_nodata_alias_warns_and_matches(tmp_path): + path = _int_sentinel_tiff(str(tmp_path / "t2961_alias.tif")) + with pytest.warns(DeprecationWarning, match="mask_nodata.*deprecated"): + legacy = open_geotiff(path, mask_nodata=True) + canonical = open_geotiff(path, masked=True) + np.testing.assert_array_equal( + np.isnan(legacy.data), np.isnan(canonical.data)) + assert legacy.dtype == canonical.dtype == np.float64 + + +def test_masked_and_mask_nodata_both_raises(tmp_path): + path = _int_sentinel_tiff(str(tmp_path / "t2961_both.tif")) + with pytest.raises(TypeError, match="either 'masked' or"): + open_geotiff(path, masked=True, mask_nodata=True) + + +def test_canonical_masked_false_emits_no_warning(tmp_path, recwarn): + path = _int_sentinel_tiff(str(tmp_path / "t2961_nowarn.tif")) + open_geotiff(path, masked=False) + assert not [w for w in recwarn.list + if issubclass(w.category, DeprecationWarning)] + + +# --------------------------------------------------------------------------- +# default_name / name deprecation alias +# --------------------------------------------------------------------------- + +def test_default_name_sets_array_name(tmp_path): + path = _int_sentinel_tiff(str(tmp_path / "t2961_name.tif")) + out = open_geotiff(path, default_name="elevation") + assert out.name == "elevation" + + +def test_name_alias_warns_and_matches(tmp_path): + path = _int_sentinel_tiff(str(tmp_path / "t2961_name_alias.tif")) + with pytest.warns(DeprecationWarning, match="name.*deprecated"): + out = open_geotiff(path, name="elevation") + assert out.name == "elevation" + + +def test_default_name_and_name_both_raises(tmp_path): + path = _int_sentinel_tiff(str(tmp_path / "t2961_name_both.tif")) + with pytest.raises(TypeError, match="either 'default_name' or"): + open_geotiff(path, default_name="a", name="b") + + +# --------------------------------------------------------------------------- +# mask_and_scale +# --------------------------------------------------------------------------- + +def test_mask_and_scale_eager(tmp_path): + path = _scale_offset_tiff(str(tmp_path / "t2961_ms_eager.tif")) + out = open_geotiff(path, mask_and_scale=True) + assert out.dtype.kind == "f" + # data * 2 + 10, sentinel pixel -> NaN + expected = np.array([[12.0, 14.0, 16.0], [18.0, 20.0, np.nan]]) + np.testing.assert_array_equal(out.data, expected) + assert out.attrs.get("scale_factor") == 2.0 + assert out.attrs.get("add_offset") == 10.0 + + +def test_mask_and_scale_dask_matches_eager(tmp_path): + path = _scale_offset_tiff(str(tmp_path / "t2961_ms_dask.tif")) + eager = open_geotiff(path, mask_and_scale=True) + lazy = open_geotiff(path, mask_and_scale=True, chunks=2) + np.testing.assert_array_equal(eager.data, lazy.compute().data) + assert lazy.attrs.get("scale_factor") == 2.0 + + +def test_mask_and_scale_no_metadata_is_noop(tmp_path): + """A source with no SCALE/OFFSET keeps raw values (scale 1, offset 0).""" + path = _int_sentinel_tiff(str(tmp_path / "t2961_ms_noop.tif")) + out = open_geotiff(path, mask_and_scale=True) + # sentinel still masked, but values otherwise unscaled + assert out.data[0, 0] == 1.0 + assert np.isnan(out.data[0, 2]) + assert "scale_factor" not in out.attrs + + +def test_mask_and_scale_int_dtype_raises(tmp_path): + path = _scale_offset_tiff(str(tmp_path / "t2961_ms_int.tif")) + with pytest.raises(ValueError): + open_geotiff(path, mask_and_scale=True, dtype="uint8") + + +# --------------------------------------------------------------------------- +# parse_coordinates +# --------------------------------------------------------------------------- + +def test_parse_coordinates_false_drops_xy_keeps_attrs(tmp_path): + path = _int_sentinel_tiff(str(tmp_path / "t2961_pc_eager.tif")) + out = open_geotiff(path, parse_coordinates=False) + assert "x" not in out.coords + assert "y" not in out.coords + assert "transform" in out.attrs + assert "crs" in out.attrs + + +def test_parse_coordinates_true_default_has_xy(tmp_path): + path = _int_sentinel_tiff(str(tmp_path / "t2961_pc_default.tif")) + out = open_geotiff(path) + assert "x" in out.coords + assert "y" in out.coords + + +def test_parse_coordinates_false_dask(tmp_path): + path = _int_sentinel_tiff(str(tmp_path / "t2961_pc_dask.tif")) + out = open_geotiff(path, parse_coordinates=False, chunks=2) + assert "x" not in out.coords + assert "transform" in out.attrs + + +# --------------------------------------------------------------------------- +# lock / cache accept-and-warn shims +# --------------------------------------------------------------------------- + +def test_lock_emits_fallback_warning(tmp_path): + path = _int_sentinel_tiff(str(tmp_path / "t2961_lock.tif")) + with pytest.warns(GeoTIFFFallbackWarning, match="lock.*cache"): + out = open_geotiff(path, lock=object()) + assert out.dtype == np.uint8 + + +def test_cache_false_emits_fallback_warning(tmp_path): + path = _int_sentinel_tiff(str(tmp_path / "t2961_cache.tif")) + with pytest.warns(GeoTIFFFallbackWarning, match="lock.*cache"): + open_geotiff(path, cache=False) + + +def test_default_lock_cache_no_warning(tmp_path, recwarn): + path = _int_sentinel_tiff(str(tmp_path / "t2961_lc_default.tif")) + open_geotiff(path) + assert not [w for w in recwarn.list + if issubclass(w.category, GeoTIFFFallbackWarning)] + + +# --------------------------------------------------------------------------- +# GPU / VRT gating for the new behavioral options +# --------------------------------------------------------------------------- + +def test_mask_and_scale_gpu_rejected(tmp_path): + path = _scale_offset_tiff(str(tmp_path / "t2961_gate_gpu.tif")) + with pytest.raises(ValueError, match="mask_and_scale.*gpu=True"): + open_geotiff(path, mask_and_scale=True, gpu=True) + + +def test_parse_coordinates_false_gpu_rejected(tmp_path): + path = _int_sentinel_tiff(str(tmp_path / "t2961_gate_gpu_pc.tif")) + with pytest.raises(ValueError, match="parse_coordinates=False.*gpu=True"): + open_geotiff(path, parse_coordinates=False, gpu=True) + + +def test_mask_and_scale_vrt_rejected(tmp_path): + src = _int_sentinel_tiff(str(tmp_path / "t2961_gate_vrt_src.tif")) + vrt = write_vrt(str(tmp_path / "t2961_gate.vrt"), source_files=[src]) + with pytest.raises(ValueError, match="mask_and_scale.*.vrt"): + open_geotiff(vrt, mask_and_scale=True) + + +def test_parse_coordinates_false_vrt_rejected(tmp_path): + src = _int_sentinel_tiff(str(tmp_path / "t2961_gate_vrt_pc_src.tif")) + vrt = write_vrt(str(tmp_path / "t2961_gate_pc.vrt"), source_files=[src]) + with pytest.raises(ValueError, match="parse_coordinates=False.*.vrt"): + open_geotiff(vrt, parse_coordinates=False) From 43f7c12f8ffe0620f29da20c4823fa7fefe43920 Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Fri, 5 Jun 2026 07:41:40 -0700 Subject: [PATCH 5/6] Update geotiff docs for masked default flip and rename (#2961) --- docs/source/reference/geotiff.rst | 11 ++++++----- docs/source/reference/release_gate_geotiff.rst | 4 ++-- docs/source/user_guide/attrs_contract.rst | 4 ++-- 3 files changed, 10 insertions(+), 9 deletions(-) diff --git a/docs/source/reference/geotiff.rst b/docs/source/reference/geotiff.rst index a8b4ed70b..bf516fd12 100644 --- a/docs/source/reference/geotiff.rst +++ b/docs/source/reference/geotiff.rst @@ -181,11 +181,12 @@ this section is the brief. (``xrspatial/geotiff/tests/write/test_nodata.py``). * Float nodata. The on-disk sentinel is recorded on ``attrs['nodata']`` and surfaces as NaN in pixel data only when the - read promotes via ``mask_nodata=True`` (the default for float - outputs). With ``mask_nodata=False`` the raw float sentinel passes - through, so downstream callers can branch on the exact value; - ``xrspatial/geotiff/tests/write/test_nodata.py`` pins this - split. + read promotes via ``masked=True``. The default is ``masked=False`` + (matching rioxarray's ``open_rasterio``), so by default the raw float + sentinel passes through and downstream callers can branch on the exact + value; pass ``masked=True`` to get NaN-masked output. + ``xrspatial/geotiff/tests/write/test_nodata.py`` pins this split. + (``mask_nodata`` is a deprecated alias of ``masked``.) * NaN nodata. A file that declares ``nodata=NaN`` is read with NaN in both ``attrs['nodata']`` and pixel data (NaN propagates either way). * ``attrs['masked_nodata']``. Every read sets a boolean lifecycle diff --git a/docs/source/reference/release_gate_geotiff.rst b/docs/source/reference/release_gate_geotiff.rst index c14cee926..c56457709 100644 --- a/docs/source/reference/release_gate_geotiff.rst +++ b/docs/source/reference/release_gate_geotiff.rst @@ -199,7 +199,7 @@ Local GeoTIFF read and write keys (``transform``, ``crs``, ``crs_wkt``, ``nodata``, ``masked_nodata``, ``georef_status``, ``raster_type``) across four scenarios: integer-nodata, float-NaN-nodata, MinIsWhite, - and the ``mask_nodata=False`` raw-sentinel branch of the + and the ``masked=False`` raw-sentinel branch of the nodata lifecycle. - ``xrspatial/geotiff/tests/release_gates/test_stable_features.py`` (eager / dask full parity section) @@ -384,7 +384,7 @@ Nodata lifecycle - stable - The sentinel survives read and write across every backend; integer sentinels are preserved bit-exact, float sentinels surface as NaN - only when ``mask_nodata=True``. + only when ``masked=True`` (default ``masked=False``). - ``xrspatial/geotiff/tests/read/test_nodata.py``, ``xrspatial/geotiff/tests/write/test_nodata.py`` - `#2341`_ diff --git a/docs/source/user_guide/attrs_contract.rst b/docs/source/user_guide/attrs_contract.rst index 26b2e1df3..35d9f7c52 100644 --- a/docs/source/user_guide/attrs_contract.rst +++ b/docs/source/user_guide/attrs_contract.rst @@ -89,8 +89,8 @@ write. replaced sentinel pixels with NaN (so the buffer is NaN-aware); ``False`` when the array still carries the literal sentinel values, including the case where the array is float dtype - because the caller passed ``mask_nodata=False`` together with - ``dtype=float...``. Only set when ``nodata`` is set; absence + because the caller passed ``masked=False`` (the default) together + with ``dtype=float...``. Only set when ``nodata`` is set; absence means no declared sentinel. See issue #2092. * - ``nodata_pixels_present`` - bool From a59264c9f1b77d399bf430dc26d2c730bc2d1417 Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Fri, 5 Jun 2026 07:46:24 -0700 Subject: [PATCH 6/6] Address review: unify _is_vrt_source, document edge cases (#2961) --- xrspatial/geotiff/__init__.py | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/xrspatial/geotiff/__init__.py b/xrspatial/geotiff/__init__.py index 4c8549baa..90b5d7487 100644 --- a/xrspatial/geotiff/__init__.py +++ b/xrspatial/geotiff/__init__.py @@ -710,6 +710,13 @@ def open_geotiff(source: str | BinaryIO, *, array; a source with differing per-band values is read with band 0's. Supported on the CPU eager and dask paths; combining it with ``gpu=True`` or a ``.vrt`` source raises ``ValueError``. + Round-trip caveat: the source's ``SCALE`` / ``OFFSET`` tags stay on + ``attrs['gdal_metadata']`` / ``attrs['gdal_metadata_xml']`` after the + read, so writing a ``mask_and_scale=True`` result back out with + ``to_geotiff`` re-embeds them, and reading that file again with + ``mask_and_scale=True`` applies the scale a second time. Drop those + tags (and ``attrs['scale_factor']`` / ``attrs['add_offset']``) before + writing if you need a clean round-trip. parse_coordinates : bool, default True [stable] If True (the default), build ``x`` / ``y`` coordinate arrays from the transform. If False, skip them and return a @@ -876,6 +883,11 @@ def open_geotiff(source: str | BinaryIO, *, # ``read_geotiff_gpu`` (gpu -> on_gpu_failure): passing both the old and # new name is ambiguous and raises, passing the old name alone warns. if mask_nodata is not _MASK_NODATA_DEPRECATED_SENTINEL: + # ``masked`` carries a real default of False, so an explicit + # ``masked=False`` cannot be told apart from the default here; that + # one combination (``masked=False`` + ``mask_nodata=True``) does not + # raise and resolves to the ``mask_nodata`` value. This matches the + # documented stance on ``read_geotiff_gpu``'s gpu/on_gpu_failure pair. if masked is not False: raise TypeError( "open_geotiff: pass either 'masked' or the deprecated " @@ -919,7 +931,7 @@ def open_geotiff(source: str | BinaryIO, *, # combination up front rather than silently ignoring the kwarg -- the # same per-backend rejection contract the dispatcher already applies to # on_gpu_failure / missing_sources / max_cloud_bytes. - _is_vrt_source_early = ( + _is_vrt_source = ( isinstance(source, str) and source.lower().endswith('.vrt')) if mask_and_scale or not parse_coordinates: offending = ( @@ -930,7 +942,7 @@ def open_geotiff(source: str | BinaryIO, *, f"{offending} is not supported with gpu=True; it is " "implemented on the CPU eager and dask paths. Drop gpu=True " "or the kwarg.") - if _is_vrt_source_early: + if _is_vrt_source: raise ValueError( f"{offending} is not supported for .vrt sources; it is " "implemented on the CPU eager and dask paths over .tif " @@ -959,8 +971,8 @@ def open_geotiff(source: str | BinaryIO, *, missing_sources_passed = ( missing_sources is not _MISSING_SOURCES_SENTINEL) - _is_vrt_source = ( - isinstance(source, str) and source.lower().endswith('.vrt')) + # ``_is_vrt_source`` was resolved above for the mask_and_scale / + # parse_coordinates gate. # Gate ``stable_only=True`` BEFORE resolving ``bbox=``. The bbox # resolver reads source geo metadata first (the TIFF path reads a