diff --git a/docs/source/reference/geotiff.rst b/docs/source/reference/geotiff.rst index eff7a37f9..a8b4ed70b 100644 --- a/docs/source/reference/geotiff.rst +++ b/docs/source/reference/geotiff.rst @@ -113,6 +113,20 @@ the corresponding caveats: * HTTP / range COG (tracked separately; see the byte-budget contract in #2298). +Restricting a read to stable sources +------------------------------------ + +The ``stable_only=True`` opt-in restricts a read to stable-tier sources. +Under it, ``open_geotiff`` rejects advanced-tier sources before any fetch +or decode: a ``.vrt`` mosaic raises ``VRTStableSourcesOnlyError``, and an +HTTP(S) URL or fsspec URI (``s3://``, ``gs://``, ``memory://``) raises +``RemoteStableSourcesOnlyError``. Both subclass +``GeoTIFFAmbiguousMetadataError``. The eager, dask, and GPU paths all run +this check, so a remote source cannot slip through on a non-VRT path. Pass +``allow_experimental_codecs=True`` to opt back into the advanced and +experimental tiers, or drop ``stable_only`` for the default behaviour. +Local-file reads always pass. + Rotated and sheared transforms ============================== diff --git a/xrspatial/geotiff/__init__.py b/xrspatial/geotiff/__init__.py index 0eaf86cf2..ac51f5956 100644 --- a/xrspatial/geotiff/__init__.py +++ b/xrspatial/geotiff/__init__.py @@ -73,8 +73,9 @@ from ._errors import (ConflictingCRSError, ConflictingNodataError, DuplicateIFDTagError, GeoTIFFAmbiguousMetadataError, InconsistentGeoKeysError, InvalidCRSCodeError, InvalidIntegerNodataError, MixedBandMetadataError, - NonRepresentableEPSGCRSError, NonUniformCoordsError, RotatedTransformError, - UnknownCRSModelTypeError, UnparseableCRSError, UnsupportedGeoTIFFFeatureError, + NonRepresentableEPSGCRSError, NonUniformCoordsError, + RemoteStableSourcesOnlyError, RotatedTransformError, UnknownCRSModelTypeError, + UnparseableCRSError, UnsupportedGeoTIFFFeatureError, VRTStableSourcesOnlyError) from ._geotags import RASTER_PIXEL_IS_AREA, RASTER_PIXEL_IS_POINT, GeoTransform # noqa: F401 from ._reader import _MAX_CLOUD_BYTES_SENTINEL, CloudSizeLimitError, UnsafeURLError @@ -116,6 +117,7 @@ 'MixedBandMetadataError', 'NonRepresentableEPSGCRSError', 'NonUniformCoordsError', + 'RemoteStableSourcesOnlyError', 'RotatedTransformError', 'SUPPORTED_FEATURES', 'UnknownCRSModelTypeError', @@ -908,6 +910,17 @@ def open_geotiff(source: str | BinaryIO, *, # fired inside ``_validate_dispatch_kwargs`` above; the non-VRT branches # below run with a string source or an eager file-like. + # The VRT branch above gates ``stable_only=True`` for ``.vrt`` sources. + # The remaining non-VRT dispatch (GPU / dask / eager) can still route to + # the advanced-tier HTTP / fsspec readers, so apply the matching gate + # here before any backend fetches or decodes pixels. + from ._validation import _validate_stable_only_remote + _validate_stable_only_remote( + source, + stable_only=stable_only, + allow_experimental_codecs=allow_experimental_codecs, + ) + # GPU path if gpu: gpu_kwargs = {} diff --git a/xrspatial/geotiff/_backends/dask.py b/xrspatial/geotiff/_backends/dask.py index c4958e45b..a87277efc 100644 --- a/xrspatial/geotiff/_backends/dask.py +++ b/xrspatial/geotiff/_backends/dask.py @@ -223,6 +223,18 @@ def read_geotiff_dask(source: str, *, **vrt_kwargs, ) + # ``open_geotiff`` gates ``stable_only=True`` for remote sources before + # dispatching here, but ``read_geotiff_dask`` is also a direct entry + # point. Apply the same gate so a direct caller cannot read an + # advanced-tier HTTP / fsspec source under ``stable_only=True``. The VRT + # branch above already forwards ``stable_only`` to ``read_vrt``. + from .._validation import _validate_stable_only_remote + _validate_stable_only_remote( + source, + stable_only=stable_only, + allow_experimental_codecs=allow_experimental_codecs, + ) + # HTTP COG sources used to fire one IFD/header GET per chunk # task. Parse metadata once here so every delayed task can reuse it. # The same prefetch path also covers fsspec URIs (s3://, gs://, ...); diff --git a/xrspatial/geotiff/_errors.py b/xrspatial/geotiff/_errors.py index 72c70e59a..b76e77496 100644 --- a/xrspatial/geotiff/_errors.py +++ b/xrspatial/geotiff/_errors.py @@ -211,6 +211,27 @@ class VRTStableSourcesOnlyError(GeoTIFFAmbiguousMetadataError): """ +class RemoteStableSourcesOnlyError(GeoTIFFAmbiguousMetadataError): + """HTTP / fsspec source opened under ``stable_only=True``. + + Raised when a caller reads an ``http(s)://`` URL or an fsspec URI + (``s3://``, ``gs://``, ``memory://``, ...) with ``stable_only=True``. + The remote readers (``reader.http`` and ``reader.fsspec``) sit at the + ``advanced`` tier in :data:`xrspatial.geotiff.SUPPORTED_FEATURES`, so + a request for stable-only sources cannot be served from a remote + source without an explicit opt-in. The sibling of + :class:`VRTStableSourcesOnlyError` for the eager and dask non-VRT + read paths; both subclass :class:`GeoTIFFAmbiguousMetadataError` so a + caller can catch the whole stable-only family at once. + + Pass ``stable_only=False`` (the default) to keep the legacy + behaviour, or pass ``allow_experimental_codecs=True`` to opt into + the broader tier set explicitly. See the release contract document + at ``docs/source/reference/release_gate_geotiff.rst`` for the full + rationale. + """ + + class UnknownCRSModelTypeError(GeoTIFFAmbiguousMetadataError): """Can't classify an EPSG as geographic or projected on write. @@ -306,6 +327,7 @@ class UnsupportedGeoTIFFFeatureError(ValueError): "MixedBandMetadataError", "NonRepresentableEPSGCRSError", "NonUniformCoordsError", + "RemoteStableSourcesOnlyError", "RotatedTransformError", "UnknownCRSModelTypeError", "UnparseableCRSError", diff --git a/xrspatial/geotiff/_validation.py b/xrspatial/geotiff/_validation.py index fe36c7907..6812e30ce 100644 --- a/xrspatial/geotiff/_validation.py +++ b/xrspatial/geotiff/_validation.py @@ -24,8 +24,8 @@ from ._coords import _BAND_DIM_NAMES from ._errors import (ConflictingCRSError, ConflictingNodataError, InconsistentGeoKeysError, InvalidIntegerNodataError, MixedBandMetadataError, NonUniformCoordsError, - RotatedTransformError, UnparseableCRSError, VRTStableSourcesOnlyError, - _distinct_per_band_nodatavals_msg) + RemoteStableSourcesOnlyError, RotatedTransformError, UnparseableCRSError, + VRTStableSourcesOnlyError, _distinct_per_band_nodatavals_msg) from ._runtime import (_MISSING_SOURCES_SENTINEL, _ON_GPU_FAILURE_SENTINEL, _TIME_DIM_NAMES, _X_DIM_NAMES, _Y_DIM_NAMES) @@ -742,6 +742,72 @@ def _validate_stable_only_vrt( ) +def _validate_stable_only_remote( + source, + *, + stable_only: bool, + allow_experimental_codecs: bool = False, +) -> None: + """Reject an HTTP / fsspec source when stable-only sources are asked. + + Companion to :func:`_validate_stable_only_vrt` for the non-VRT read + paths. ``reader.http`` and ``reader.fsspec`` sit at the ``advanced`` + tier in :data:`xrspatial.geotiff.SUPPORTED_FEATURES`, so a caller who + asks for stable-only sources via ``stable_only=True`` must not be + served from a remote source. The VRT dispatcher already gates this at + graph-build / eager-read setup; the eager and dask non-VRT paths route + around that gate (the VRT validator only fires for ``.vrt`` paths), so + HTTP / fsspec sources used to slip through ``stable_only=True``. + Reject up front, before any range GET or decode work. + + Parameters + ---------- + source : str or path-like or file-like + The read source. Only ``http(s)://`` URLs and fsspec URIs raise; + local-file paths and eager file-like buffers pass through, since + ``reader.local_file`` is a stable-tier reader. + stable_only : bool + Caller's opt-in for stable-only sources. ``False`` is a no-op; + ``True`` raises :class:`RemoteStableSourcesOnlyError` for a remote + source unless ``allow_experimental_codecs=True``. + allow_experimental_codecs : bool, default False + Documented unlock. When set, the gate becomes a no-op so the + caller can opt into the advanced / experimental tiers explicitly, + mirroring :func:`_validate_stable_only_vrt`. + + Raises + ------ + RemoteStableSourcesOnlyError + When ``stable_only=True``, the caller did not pass + ``allow_experimental_codecs=True``, and ``source`` is an HTTP(S) + URL or an fsspec URI. The message names the offending source, + both flags, and cites the release-contract document. + """ + if not stable_only: + return + if allow_experimental_codecs: + return + # Local imports: the validation layer avoids eager-importing the + # sources / reader scheme helpers at module load, matching the dask + # backend so the package imports without the cloud-read deps in + # environments that only consume local-file reads. + from ._reader import _is_fsspec_uri + from ._sources import _is_http_source + if not (_is_http_source(source) or _is_fsspec_uri(source)): + return + raise RemoteStableSourcesOnlyError( + f"Remote source '{source}' cannot be opened under " + f"stable_only=True. The HTTP and fsspec readers (``reader.http`` " + f"/ ``reader.fsspec``) sit at the advanced tier in " + f"SUPPORTED_FEATURES, so the stable-only request cannot be served " + f"from a remote source. Pass allow_experimental_codecs=True to " + f"opt in to the advanced / experimental tiers, or drop " + f"stable_only=True to keep the default behaviour. See " + f"docs/source/reference/release_gate_geotiff.rst for the " + f"release contract and epic #2342 for the tracking issue." + ) + + def _validate_no_rotated_affine(attrs, *, drop_rotation: bool, entry_point: str = "to_geotiff") -> None: """Refuse writes that would silently drop ``attrs['rotated_affine']``. diff --git a/xrspatial/geotiff/tests/release_gates/test_features.py b/xrspatial/geotiff/tests/release_gates/test_features.py index 2887c3ccb..5064125ed 100644 --- a/xrspatial/geotiff/tests/release_gates/test_features.py +++ b/xrspatial/geotiff/tests/release_gates/test_features.py @@ -2851,6 +2851,11 @@ def test_all_lists_supported_functions(self): # so the request cannot be served without naming the # broader-tier opt-in. 'VRTStableSourcesOnlyError', + # Sibling of VRTStableSourcesOnlyError for the non-VRT read + # paths: a caller opening an HTTP / fsspec source under + # ``stable_only=True`` is rejected because those readers are + # advanced-tier. + 'RemoteStableSourcesOnlyError', 'GeoTIFFFallbackWarning', 'UnsafeURLError', # Canonical georef_status constants. Exposed so downstream diff --git a/xrspatial/geotiff/tests/test_stable_only_remote_2821.py b/xrspatial/geotiff/tests/test_stable_only_remote_2821.py new file mode 100644 index 000000000..32ba6ef29 --- /dev/null +++ b/xrspatial/geotiff/tests/test_stable_only_remote_2821.py @@ -0,0 +1,136 @@ +"""``stable_only=True`` must reject advanced-tier HTTP / fsspec sources. + +Regression coverage for issue #2821. ``reader.http`` and +``reader.fsspec`` sit at the ``advanced`` tier in +:data:`xrspatial.geotiff.SUPPORTED_FEATURES`, so a caller who asks for +stable-only sources via ``stable_only=True`` must not be served from a +remote source. Before the fix the gate only fired on the VRT dispatch: +the eager non-VRT path called ``read_to_array`` directly and the dask +path only forwarded ``stable_only`` to ``read_vrt`` for ``.vrt`` paths, +so an HTTP / fsspec source slipped through unchecked. + +The tests push a small valid (stable-codec, local-style) GeoTIFF into +the fsspec ``memory://`` filesystem, which ships with fsspec and runs in +plain CI without credentials. ``open_geotiff('memory://...')`` walks the +same ``_is_fsspec_uri`` routing an ``s3://`` / ``gs://`` URL would, so +the gate fires before any read. HTTP URLs are checked against the local +file source as the negative control. +""" +from __future__ import annotations + +import numpy as np +import pytest + +from xrspatial.geotiff import (GeoTIFFAmbiguousMetadataError, RemoteStableSourcesOnlyError, + open_geotiff, read_geotiff_dask) +from xrspatial.geotiff.tests._helpers.tiff_builders import make_minimal_tiff + +fsspec = pytest.importorskip("fsspec") + + +_MEMORY_URL = "memory:///stable_only_2821/sample.tif" + + +@pytest.fixture +def memory_tiff_url(): + """Push a stable-codec GeoTIFF into fsspec memory; yield its URL. + + The memory filesystem is a process-global singleton, so wipe the + sample before and after each test to avoid cross-test leakage. + """ + payload = make_minimal_tiff( + 4, 4, np.dtype("float32"), + geo_transform=(-120.0, 45.0, 0.001, -0.001), + epsg=4326, + ) + fs = fsspec.filesystem("memory") + with fs.open(_MEMORY_URL, "wb") as handle: + handle.write(payload) + try: + yield _MEMORY_URL + finally: + if fs.exists(_MEMORY_URL): + fs.rm(_MEMORY_URL) + + +@pytest.fixture +def local_tiff_path(tmp_path): + """Write a stable-codec GeoTIFF to a local file; yield its path.""" + payload = make_minimal_tiff( + 4, 4, np.dtype("float32"), + geo_transform=(-120.0, 45.0, 0.001, -0.001), + epsg=4326, + ) + path = tmp_path / "stable_only_2821_local.tif" + path.write_bytes(payload) + return str(path) + + +def _assert_remote_stable_error(excinfo) -> None: + """The rejection names the unlocking opt-in and cites the contract.""" + msg = str(excinfo.value) + assert "stable_only" in msg or "allow_experimental_codecs" in msg, msg + assert "release_gate_geotiff.rst" in msg or "#2342" in msg, msg + + +def test_eager_fsspec_source_rejected_under_stable_only(memory_tiff_url): + """Eager non-VRT read of an fsspec source rejects ``stable_only=True``.""" + with pytest.raises(RemoteStableSourcesOnlyError) as excinfo: + open_geotiff(memory_tiff_url, stable_only=True) + _assert_remote_stable_error(excinfo) + + +def test_dask_fsspec_source_rejected_under_stable_only(memory_tiff_url): + """Dask non-VRT read of an fsspec source rejects ``stable_only=True``.""" + with pytest.raises(RemoteStableSourcesOnlyError) as excinfo: + open_geotiff(memory_tiff_url, stable_only=True, chunks=2) + _assert_remote_stable_error(excinfo) + + +def test_dask_direct_fsspec_source_rejected_under_stable_only(memory_tiff_url): + """The direct ``read_geotiff_dask`` entry point gates remote sources too.""" + with pytest.raises(RemoteStableSourcesOnlyError) as excinfo: + read_geotiff_dask(memory_tiff_url, chunks=2, stable_only=True) + _assert_remote_stable_error(excinfo) + + +def test_remote_stable_error_is_ambiguous_metadata_subclass(): + """The error stays catchable as the shared stable-only family.""" + assert issubclass(RemoteStableSourcesOnlyError, GeoTIFFAmbiguousMetadataError) + + +def test_http_source_rejected_under_stable_only(): + """An ``http://`` source is gated before any network fetch. + + The gate runs ahead of the SSRF / fetch machinery, so no real + request is made; the URL never resolves. + """ + with pytest.raises(RemoteStableSourcesOnlyError) as excinfo: + open_geotiff("http://example.invalid/sample.tif", stable_only=True) + _assert_remote_stable_error(excinfo) + + +def test_fsspec_source_allowed_with_experimental_optin(memory_tiff_url): + """``allow_experimental_codecs=True`` unlocks the advanced tier.""" + result = open_geotiff( + memory_tiff_url, stable_only=True, allow_experimental_codecs=True, + ) + assert result.shape == (4, 4) + + +def test_fsspec_source_allowed_without_stable_only(memory_tiff_url): + """The default (``stable_only=False``) keeps the legacy behaviour.""" + result = open_geotiff(memory_tiff_url) + assert result.shape == (4, 4) + + +def test_local_eager_source_succeeds_under_stable_only(local_tiff_path): + """A plain local-file eager read still works under ``stable_only=True``.""" + result = open_geotiff(local_tiff_path, stable_only=True) + assert result.shape == (4, 4) + + +def test_local_dask_source_succeeds_under_stable_only(local_tiff_path): + """A plain local-file dask read still works under ``stable_only=True``.""" + result = open_geotiff(local_tiff_path, stable_only=True, chunks=2) + assert result.shape == (4, 4)