Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions docs/source/reference/geotiff.rst
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,20 @@ the corresponding caveats:
* HTTP / range COG (tracked separately; see the byte-budget contract in
#2298).

Restricting a read to stable sources
------------------------------------

The ``stable_only=True`` opt-in restricts a read to stable-tier sources.
Under it, ``open_geotiff`` rejects advanced-tier sources before any fetch
or decode: a ``.vrt`` mosaic raises ``VRTStableSourcesOnlyError``, and an
HTTP(S) URL or fsspec URI (``s3://``, ``gs://``, ``memory://``) raises
``RemoteStableSourcesOnlyError``. Both subclass
``GeoTIFFAmbiguousMetadataError``. The eager, dask, and GPU paths all run
this check, so a remote source cannot slip through on a non-VRT path. Pass
``allow_experimental_codecs=True`` to opt back into the advanced and
experimental tiers, or drop ``stable_only`` for the default behaviour.
Local-file reads always pass.

Rotated and sheared transforms
==============================

Expand Down
17 changes: 15 additions & 2 deletions xrspatial/geotiff/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,9 @@
from ._errors import (ConflictingCRSError, ConflictingNodataError, DuplicateIFDTagError,
GeoTIFFAmbiguousMetadataError, InconsistentGeoKeysError, InvalidCRSCodeError,
InvalidIntegerNodataError, MixedBandMetadataError,
NonRepresentableEPSGCRSError, NonUniformCoordsError, RotatedTransformError,
UnknownCRSModelTypeError, UnparseableCRSError, UnsupportedGeoTIFFFeatureError,
NonRepresentableEPSGCRSError, NonUniformCoordsError,
RemoteStableSourcesOnlyError, RotatedTransformError, UnknownCRSModelTypeError,
UnparseableCRSError, UnsupportedGeoTIFFFeatureError,
VRTStableSourcesOnlyError)
from ._geotags import RASTER_PIXEL_IS_AREA, RASTER_PIXEL_IS_POINT, GeoTransform # noqa: F401
from ._reader import _MAX_CLOUD_BYTES_SENTINEL, CloudSizeLimitError, UnsafeURLError
Expand Down Expand Up @@ -116,6 +117,7 @@
'MixedBandMetadataError',
'NonRepresentableEPSGCRSError',
'NonUniformCoordsError',
'RemoteStableSourcesOnlyError',
'RotatedTransformError',
'SUPPORTED_FEATURES',
'UnknownCRSModelTypeError',
Expand Down Expand Up @@ -908,6 +910,17 @@ def open_geotiff(source: str | BinaryIO, *,
# fired inside ``_validate_dispatch_kwargs`` above; the non-VRT branches
# below run with a string source or an eager file-like.

# The VRT branch above gates ``stable_only=True`` for ``.vrt`` sources.
# The remaining non-VRT dispatch (GPU / dask / eager) can still route to
# the advanced-tier HTTP / fsspec readers, so apply the matching gate
# here before any backend fetches or decodes pixels.
from ._validation import _validate_stable_only_remote
_validate_stable_only_remote(
source,
stable_only=stable_only,
allow_experimental_codecs=allow_experimental_codecs,
)

# GPU path
if gpu:
gpu_kwargs = {}
Expand Down
12 changes: 12 additions & 0 deletions xrspatial/geotiff/_backends/dask.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,18 @@ def read_geotiff_dask(source: str, *,
**vrt_kwargs,
)

# ``open_geotiff`` gates ``stable_only=True`` for remote sources before
# dispatching here, but ``read_geotiff_dask`` is also a direct entry
# point. Apply the same gate so a direct caller cannot read an
# advanced-tier HTTP / fsspec source under ``stable_only=True``. The VRT
# branch above already forwards ``stable_only`` to ``read_vrt``.
from .._validation import _validate_stable_only_remote
_validate_stable_only_remote(
source,
stable_only=stable_only,
allow_experimental_codecs=allow_experimental_codecs,
)

# HTTP COG sources used to fire one IFD/header GET per chunk
# task. Parse metadata once here so every delayed task can reuse it.
# The same prefetch path also covers fsspec URIs (s3://, gs://, ...);
Expand Down
22 changes: 22 additions & 0 deletions xrspatial/geotiff/_errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,27 @@ class VRTStableSourcesOnlyError(GeoTIFFAmbiguousMetadataError):
"""


class RemoteStableSourcesOnlyError(GeoTIFFAmbiguousMetadataError):
"""HTTP / fsspec source opened under ``stable_only=True``.

Raised when a caller reads an ``http(s)://`` URL or an fsspec URI
(``s3://``, ``gs://``, ``memory://``, ...) with ``stable_only=True``.
The remote readers (``reader.http`` and ``reader.fsspec``) sit at the
``advanced`` tier in :data:`xrspatial.geotiff.SUPPORTED_FEATURES`, so
a request for stable-only sources cannot be served from a remote
source without an explicit opt-in. The sibling of
:class:`VRTStableSourcesOnlyError` for the eager and dask non-VRT
read paths; both subclass :class:`GeoTIFFAmbiguousMetadataError` so a
caller can catch the whole stable-only family at once.

Pass ``stable_only=False`` (the default) to keep the legacy
behaviour, or pass ``allow_experimental_codecs=True`` to opt into
the broader tier set explicitly. See the release contract document
at ``docs/source/reference/release_gate_geotiff.rst`` for the full
rationale.
"""


class UnknownCRSModelTypeError(GeoTIFFAmbiguousMetadataError):
"""Can't classify an EPSG as geographic or projected on write.

Expand Down Expand Up @@ -306,6 +327,7 @@ class UnsupportedGeoTIFFFeatureError(ValueError):
"MixedBandMetadataError",
"NonRepresentableEPSGCRSError",
"NonUniformCoordsError",
"RemoteStableSourcesOnlyError",
"RotatedTransformError",
"UnknownCRSModelTypeError",
"UnparseableCRSError",
Expand Down
70 changes: 68 additions & 2 deletions xrspatial/geotiff/_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@
from ._coords import _BAND_DIM_NAMES
from ._errors import (ConflictingCRSError, ConflictingNodataError, InconsistentGeoKeysError,
InvalidIntegerNodataError, MixedBandMetadataError, NonUniformCoordsError,
RotatedTransformError, UnparseableCRSError, VRTStableSourcesOnlyError,
_distinct_per_band_nodatavals_msg)
RemoteStableSourcesOnlyError, RotatedTransformError, UnparseableCRSError,
VRTStableSourcesOnlyError, _distinct_per_band_nodatavals_msg)
from ._runtime import (_MISSING_SOURCES_SENTINEL, _ON_GPU_FAILURE_SENTINEL, _TIME_DIM_NAMES,
_X_DIM_NAMES, _Y_DIM_NAMES)

Expand Down Expand Up @@ -742,6 +742,72 @@ def _validate_stable_only_vrt(
)


def _validate_stable_only_remote(
source,
*,
stable_only: bool,
allow_experimental_codecs: bool = False,
) -> None:
"""Reject an HTTP / fsspec source when stable-only sources are asked.

Companion to :func:`_validate_stable_only_vrt` for the non-VRT read
paths. ``reader.http`` and ``reader.fsspec`` sit at the ``advanced``
tier in :data:`xrspatial.geotiff.SUPPORTED_FEATURES`, so a caller who
asks for stable-only sources via ``stable_only=True`` must not be
served from a remote source. The VRT dispatcher already gates this at
graph-build / eager-read setup; the eager and dask non-VRT paths route
around that gate (the VRT validator only fires for ``.vrt`` paths), so
HTTP / fsspec sources used to slip through ``stable_only=True``.
Reject up front, before any range GET or decode work.

Parameters
----------
source : str or path-like or file-like
The read source. Only ``http(s)://`` URLs and fsspec URIs raise;
local-file paths and eager file-like buffers pass through, since
``reader.local_file`` is a stable-tier reader.
stable_only : bool
Caller's opt-in for stable-only sources. ``False`` is a no-op;
``True`` raises :class:`RemoteStableSourcesOnlyError` for a remote
source unless ``allow_experimental_codecs=True``.
allow_experimental_codecs : bool, default False
Documented unlock. When set, the gate becomes a no-op so the
caller can opt into the advanced / experimental tiers explicitly,
mirroring :func:`_validate_stable_only_vrt`.

Raises
------
RemoteStableSourcesOnlyError
When ``stable_only=True``, the caller did not pass
``allow_experimental_codecs=True``, and ``source`` is an HTTP(S)
URL or an fsspec URI. The message names the offending source,
both flags, and cites the release-contract document.
"""
if not stable_only:
return
if allow_experimental_codecs:
return
# Local imports: the validation layer avoids eager-importing the
# sources / reader scheme helpers at module load, matching the dask
# backend so the package imports without the cloud-read deps in
# environments that only consume local-file reads.
from ._reader import _is_fsspec_uri
from ._sources import _is_http_source
if not (_is_http_source(source) or _is_fsspec_uri(source)):
return
raise RemoteStableSourcesOnlyError(
f"Remote source '{source}' cannot be opened under "
f"stable_only=True. The HTTP and fsspec readers (``reader.http`` "
f"/ ``reader.fsspec``) sit at the advanced tier in "
f"SUPPORTED_FEATURES, so the stable-only request cannot be served "
f"from a remote source. Pass allow_experimental_codecs=True to "
f"opt in to the advanced / experimental tiers, or drop "
f"stable_only=True to keep the default behaviour. See "
f"docs/source/reference/release_gate_geotiff.rst for the "
f"release contract and epic #2342 for the tracking issue."
)


def _validate_no_rotated_affine(attrs, *, drop_rotation: bool,
entry_point: str = "to_geotiff") -> None:
"""Refuse writes that would silently drop ``attrs['rotated_affine']``.
Expand Down
5 changes: 5 additions & 0 deletions xrspatial/geotiff/tests/release_gates/test_features.py
Original file line number Diff line number Diff line change
Expand Up @@ -2851,6 +2851,11 @@ def test_all_lists_supported_functions(self):
# so the request cannot be served without naming the
# broader-tier opt-in.
'VRTStableSourcesOnlyError',
# Sibling of VRTStableSourcesOnlyError for the non-VRT read
# paths: a caller opening an HTTP / fsspec source under
# ``stable_only=True`` is rejected because those readers are
# advanced-tier.
'RemoteStableSourcesOnlyError',
'GeoTIFFFallbackWarning',
'UnsafeURLError',
# Canonical georef_status constants. Exposed so downstream
Expand Down
136 changes: 136 additions & 0 deletions xrspatial/geotiff/tests/test_stable_only_remote_2821.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
"""``stable_only=True`` must reject advanced-tier HTTP / fsspec sources.

Regression coverage for issue #2821. ``reader.http`` and
``reader.fsspec`` sit at the ``advanced`` tier in
:data:`xrspatial.geotiff.SUPPORTED_FEATURES`, so a caller who asks for
stable-only sources via ``stable_only=True`` must not be served from a
remote source. Before the fix the gate only fired on the VRT dispatch:
the eager non-VRT path called ``read_to_array`` directly and the dask
path only forwarded ``stable_only`` to ``read_vrt`` for ``.vrt`` paths,
so an HTTP / fsspec source slipped through unchecked.

The tests push a small valid (stable-codec, local-style) GeoTIFF into
the fsspec ``memory://`` filesystem, which ships with fsspec and runs in
plain CI without credentials. ``open_geotiff('memory://...')`` walks the
same ``_is_fsspec_uri`` routing an ``s3://`` / ``gs://`` URL would, so
the gate fires before any read. HTTP URLs are checked against the local
file source as the negative control.
"""
from __future__ import annotations

import numpy as np
import pytest

from xrspatial.geotiff import (GeoTIFFAmbiguousMetadataError, RemoteStableSourcesOnlyError,
open_geotiff, read_geotiff_dask)
from xrspatial.geotiff.tests._helpers.tiff_builders import make_minimal_tiff

fsspec = pytest.importorskip("fsspec")


_MEMORY_URL = "memory:///stable_only_2821/sample.tif"


@pytest.fixture
def memory_tiff_url():
"""Push a stable-codec GeoTIFF into fsspec memory; yield its URL.

The memory filesystem is a process-global singleton, so wipe the
sample before and after each test to avoid cross-test leakage.
"""
payload = make_minimal_tiff(
4, 4, np.dtype("float32"),
geo_transform=(-120.0, 45.0, 0.001, -0.001),
epsg=4326,
)
fs = fsspec.filesystem("memory")
with fs.open(_MEMORY_URL, "wb") as handle:
handle.write(payload)
try:
yield _MEMORY_URL
finally:
if fs.exists(_MEMORY_URL):
fs.rm(_MEMORY_URL)


@pytest.fixture
def local_tiff_path(tmp_path):
"""Write a stable-codec GeoTIFF to a local file; yield its path."""
payload = make_minimal_tiff(
4, 4, np.dtype("float32"),
geo_transform=(-120.0, 45.0, 0.001, -0.001),
epsg=4326,
)
path = tmp_path / "stable_only_2821_local.tif"
path.write_bytes(payload)
return str(path)


def _assert_remote_stable_error(excinfo) -> None:
"""The rejection names the unlocking opt-in and cites the contract."""
msg = str(excinfo.value)
assert "stable_only" in msg or "allow_experimental_codecs" in msg, msg
assert "release_gate_geotiff.rst" in msg or "#2342" in msg, msg


def test_eager_fsspec_source_rejected_under_stable_only(memory_tiff_url):
"""Eager non-VRT read of an fsspec source rejects ``stable_only=True``."""
with pytest.raises(RemoteStableSourcesOnlyError) as excinfo:
open_geotiff(memory_tiff_url, stable_only=True)
_assert_remote_stable_error(excinfo)


def test_dask_fsspec_source_rejected_under_stable_only(memory_tiff_url):
"""Dask non-VRT read of an fsspec source rejects ``stable_only=True``."""
with pytest.raises(RemoteStableSourcesOnlyError) as excinfo:
open_geotiff(memory_tiff_url, stable_only=True, chunks=2)
_assert_remote_stable_error(excinfo)


def test_dask_direct_fsspec_source_rejected_under_stable_only(memory_tiff_url):
"""The direct ``read_geotiff_dask`` entry point gates remote sources too."""
with pytest.raises(RemoteStableSourcesOnlyError) as excinfo:
read_geotiff_dask(memory_tiff_url, chunks=2, stable_only=True)
_assert_remote_stable_error(excinfo)


def test_remote_stable_error_is_ambiguous_metadata_subclass():
"""The error stays catchable as the shared stable-only family."""
assert issubclass(RemoteStableSourcesOnlyError, GeoTIFFAmbiguousMetadataError)


def test_http_source_rejected_under_stable_only():
"""An ``http://`` source is gated before any network fetch.

The gate runs ahead of the SSRF / fetch machinery, so no real
request is made; the URL never resolves.
"""
with pytest.raises(RemoteStableSourcesOnlyError) as excinfo:
open_geotiff("http://example.invalid/sample.tif", stable_only=True)
_assert_remote_stable_error(excinfo)


def test_fsspec_source_allowed_with_experimental_optin(memory_tiff_url):
"""``allow_experimental_codecs=True`` unlocks the advanced tier."""
result = open_geotiff(
memory_tiff_url, stable_only=True, allow_experimental_codecs=True,
)
assert result.shape == (4, 4)


def test_fsspec_source_allowed_without_stable_only(memory_tiff_url):
"""The default (``stable_only=False``) keeps the legacy behaviour."""
result = open_geotiff(memory_tiff_url)
assert result.shape == (4, 4)


def test_local_eager_source_succeeds_under_stable_only(local_tiff_path):
"""A plain local-file eager read still works under ``stable_only=True``."""
result = open_geotiff(local_tiff_path, stable_only=True)
assert result.shape == (4, 4)


def test_local_dask_source_succeeds_under_stable_only(local_tiff_path):
"""A plain local-file dask read still works under ``stable_only=True``."""
result = open_geotiff(local_tiff_path, stable_only=True, chunks=2)
assert result.shape == (4, 4)
Loading