Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 13 additions & 13 deletions .claude/sweep-metadata-state.csv

Large diffs are not rendered by default.

166 changes: 123 additions & 43 deletions xrspatial/geotiff/_vrt.py
Original file line number Diff line number Diff line change
Expand Up @@ -1892,10 +1892,13 @@ def _check_no_mixed_georef(sources_meta: list[dict]) -> None:
``(0, 0)`` as if those identity values were real CRS coordinates.
Refuse rather than emit a mosaic that mislocates the tile.

The all-georeferenced and all-non-georeferenced cases both pass:
write_vrt emits a ``<GeoTransform>`` only when every source is
georeferenced, so the all-non-georeferenced VRT preserves
``georef_status='none'`` on read.
The all-georeferenced and all-non-georeferenced cases both pass
this gate: write_vrt emits a ``<GeoTransform>`` only when every
source is georeferenced, so the all-non-georeferenced VRT preserves
``georef_status='none'`` on read. Placement for multiple
non-georeferenced sources is handled separately in
:func:`write_vrt`: it requires explicit ``dst_offsets`` because the
identity transforms cannot place the tiles (issue #3116).
"""
if not sources_meta:
return
Expand Down Expand Up @@ -1978,7 +1981,8 @@ def _check_no_mixed_nodata(sources_meta: list[dict], *,
def write_vrt(vrt_path: str, source_files: list[str], *,
relative: bool = True,
crs_wkt: str | None = None,
nodata: float | int | None = None) -> str:
nodata: float | int | None = None,
dst_offsets: list[tuple[int, int]] | None = None) -> str:
"""Generate a VRT file that mosaics multiple GeoTIFF tiles.

Do not call this symbol directly from external code. This is the
Expand Down Expand Up @@ -2014,6 +2018,20 @@ def write_vrt(vrt_path: str, source_files: list[str], *,
(e.g. ``65535`` for uint16, ``-9999`` for int32) are accepted
so the surface lines up with the ``nodata`` kwarg on
``to_geotiff`` and ``_write_geotiff_gpu``.
dst_offsets : list of (int, int) or None
[internal-only] Explicit pixel-space placement for
non-georeferenced sources: one ``(x_off, y_off)`` pair per
entry of ``source_files``, giving the source's top-left corner
in the mosaic. Required when more than one non-georeferenced
source is supplied -- such sources carry only the default
identity transform, so their placement cannot be derived from
georeferencing (issue #3116). Rejected when any source is
georeferenced: georeferenced sources place via their
GeoTransform and an explicit override would let the two
disagree silently. Offsets are not checked for overlap or
full coverage; the tiled write path always supplies a
non-overlapping cover, and a direct caller is responsible
for its own layout.

Returns
-------
Expand Down Expand Up @@ -2131,6 +2149,52 @@ def _pixel_size_mismatch(a: float, b: float) -> bool:
_check_no_mixed_georef(sources_meta)
_check_no_mixed_nodata(sources_meta, caller_nodata=nodata)

# Placement policy for non-georeferenced sources (issue #3116).
# Mosaic placement is normally derived from each source's
# GeoTransform, but a non-georeferenced source carries only the
# default identity transform, so every such source maps to the same
# extent. Deriving placement from that stacks all tiles at DstRect
# (0, 0) and shrinks the mosaic to one tile -- a silently corrupt
# index. Callers that know the layout (the tiled ``.vrt`` write
# path in ``_writers/eager.py``) pass explicit pixel offsets via
# ``dst_offsets``; anything else with more than one
# non-georeferenced source is refused, mirroring the
# ``_check_no_mixed_georef`` fail-closed precedent.
all_non_georef = not any(
bool(m.get('has_georef')) for m in sources_meta)
if dst_offsets is not None:
if not all_non_georef:
georeffed = [m['path'] for m in sources_meta
if m.get('has_georef')]
raise ValueError(
f"write_vrt: dst_offsets is only valid when every source "
f"is non-georeferenced; georeferenced sources "
f"{georeffed!r} place via their GeoTransform, and an "
f"explicit pixel offset could silently disagree with it. "
f"Drop dst_offsets or strip the georeferencing.")
if len(dst_offsets) != len(sources_meta):
raise ValueError(
f"write_vrt: dst_offsets has {len(dst_offsets)} entries "
f"for {len(sources_meta)} source file(s); pass one "
f"(x_off, y_off) pair per source.")
for i, pair in enumerate(dst_offsets):
if (not isinstance(pair, (tuple, list)) or len(pair) != 2
or any(isinstance(v, bool)
or not isinstance(v, (int, np.integer))
or v < 0 for v in pair)):
raise ValueError(
f"write_vrt: dst_offsets[{i}] must be a pair of "
f"non-negative ints (x_off, y_off), got {pair!r}.")
elif all_non_georef and len(sources_meta) > 1:
raise ValueError(
f"write_vrt: {len(sources_meta)} non-georeferenced sources "
f"with no dst_offsets. Non-georeferenced TIFFs carry only a "
f"default identity transform, so their mosaic placement "
f"cannot be derived from georeferencing; writing them "
f"without explicit placement would stack every source at "
f"the origin. Pass dst_offsets with one (x_off, y_off) pair "
f"per source, or georeference the sources.")

# Pixel size, sample format, band count, and CRS share the
# documented "all sources must agree with first" contract. These
# remain inline here because they predate the grouped gates above
Expand Down Expand Up @@ -2175,26 +2239,59 @@ def _pixel_size_mismatch(a: float, b: float) -> bool:
f"share the same CRS."
)

# Compute the bounding box of all sources
all_x0, all_y0, all_x1, all_y1 = [], [], [], []
for m in sources_meta:
t = m['transform']
x0 = t.origin_x
y0 = t.origin_y
x1 = x0 + m['width'] * t.pixel_width
y1 = y0 + m['height'] * t.pixel_height
all_x0.append(min(x0, x1))
all_y0.append(min(y0, y1))
all_x1.append(max(x0, x1))
all_y1.append(max(y0, y1))

mosaic_x0 = min(all_x0)
mosaic_y_top = max(all_y1) # top edge (y increases upward in geo)
mosaic_x1 = max(all_x1)
mosaic_y_bottom = min(all_y0)

total_w = int(round((mosaic_x1 - mosaic_x0) / abs(res_x)))
total_h = int(round((mosaic_y_top - mosaic_y_bottom) / abs(res_y)))
if dst_offsets is not None:
# Explicit pixel-space placement (non-georeferenced mosaic).
# The mosaic extent is the union of the placed sources; no
# GeoTransform is emitted (every source is non-georeferenced,
# enforced above), so the geo bbox math is skipped entirely.
placements = [(int(x), int(y)) for x, y in dst_offsets]
total_w = max(x + m['width']
for (x, _y), m in zip(placements, sources_meta))
total_h = max(y + m['height']
for (_x, y), m in zip(placements, sources_meta))
mosaic_x0 = mosaic_y_top = None # no geo origin without georef
else:
# Compute the bounding box of all sources
all_x0, all_y0, all_x1, all_y1 = [], [], [], []
for m in sources_meta:
t = m['transform']
x0 = t.origin_x
y0 = t.origin_y
x1 = x0 + m['width'] * t.pixel_width
y1 = y0 + m['height'] * t.pixel_height
all_x0.append(min(x0, x1))
all_y0.append(min(y0, y1))
all_x1.append(max(x0, x1))
all_y1.append(max(y0, y1))

mosaic_x0 = min(all_x0)
mosaic_y_top = max(all_y1) # top edge (y increases upward in geo)
mosaic_x1 = max(all_x1)
mosaic_y_bottom = min(all_y0)

total_w = int(round((mosaic_x1 - mosaic_x0) / abs(res_x)))
total_h = int(round((mosaic_y_top - mosaic_y_bottom) / abs(res_y)))

# Pixel offset of each source in the virtual raster, derived
# from its GeoTransform. ``origin_y`` is the *top* only for
# north-up rasters (pixel_height < 0); sources with ascending y
# (pixel_height > 0) place origin_y at the bottom, so the top is
# origin_y + height * pixel_height.
placements = []
for m in sources_meta:
t = m['transform']
src_top = max(
t.origin_y,
t.origin_y + m['height'] * t.pixel_height,
)
src_left = min(
t.origin_x,
t.origin_x + m['width'] * t.pixel_width,
)
placements.append((
int(round((src_left - mosaic_x0) / abs(res_x))),
int(round((mosaic_y_top - src_top) / abs(res_y))),
))

# Determine VRT dtype via the central TIFF-to-numpy resolver so the
# VRT header agrees with what the reader will actually decode. The
Expand Down Expand Up @@ -2239,24 +2336,7 @@ def _pixel_size_mismatch(a: float, b: float) -> bool:
if nd is not None:
lines.append(f' <NoDataValue>{_xml_text(nd)}</NoDataValue>')

for m in sources_meta:
t = m['transform']
# Top edge of this source in geo coords -- origin_y is the
# *top* only for north-up rasters (pixel_height < 0). Sources
# with ascending y (pixel_height > 0) place origin_y at the
# bottom, so the top is origin_y + height * pixel_height.
src_top = max(
t.origin_y,
t.origin_y + m['height'] * t.pixel_height,
)
src_left = min(
t.origin_x,
t.origin_x + m['width'] * t.pixel_width,
)
# Pixel offset in the virtual raster
dst_x_off = int(round((src_left - mosaic_x0) / abs(res_x)))
dst_y_off = int(round((mosaic_y_top - src_top) / abs(res_y)))

for m, (dst_x_off, dst_y_off) in zip(sources_meta, placements):
fname = m['path']
rel_attr = '0'
if relative:
Expand Down
14 changes: 13 additions & 1 deletion xrspatial/geotiff/_writers/eager.py
Original file line number Diff line number Diff line change
Expand Up @@ -1376,7 +1376,12 @@ def _cleanup_staging():
# Tiles are written into ``staging_dir``; ``tile_names`` records the
# bare filenames so the final tile paths (under ``tiles_dir``) can be
# rebuilt for the VRT index after the atomic rename below.
# ``tile_offsets`` records each tile's (x_off, y_off) pixel
# placement in lockstep: non-georeferenced tiles carry no
# transform, so the VRT index needs the placement passed
# explicitly or every tile lands at the origin (issue #3116).
tile_names = []
tile_offsets = []
delayed_tasks = []

def _safe_write_tile(*args, **kwargs):
Expand Down Expand Up @@ -1407,6 +1412,7 @@ def _safe_write_tile(*args, **kwargs):
tile_name = f'tile_{ri:0{pad_width}d}_{ci:0{pad_width}d}.tif'
tile_path = os.path.join(staging_dir, tile_name)
tile_names.append(tile_name)
tile_offsets.append((col_offset, row_offset))

# Compute per-tile geo_transform
tile_gt = None
Expand Down Expand Up @@ -1514,7 +1520,13 @@ def _safe_write_tile(*args, **kwargs):
# and validated above; passing it again is idempotent.
from .vrt import _build_vrt
try:
_build_vrt(vrt_path, tile_paths, relative=True, nodata=nodata)
# ``dst_offsets`` carries the tile placement only for
# non-georeferenced input: georeferenced tiles place via their
# per-tile GeoTransform (computed above), and write_vrt rejects
# the kwarg alongside georeferenced sources (issue #3116).
_build_vrt(vrt_path, tile_paths, relative=True, nodata=nodata,
dst_offsets=(
tile_offsets if geo_transform is None else None))
except BaseException:
# The index step failed after the rename. Remove the now-renamed
# tile dir too so a retry is not blocked by the leftover-state
Expand Down
12 changes: 11 additions & 1 deletion xrspatial/geotiff/_writers/vrt.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@ def _build_vrt(path: str = _VRT_PATH_MISSING_SENTINEL,
relative: bool = True,
crs: int | str | None = None,
crs_wkt: str | None = _CRS_WKT_DEPRECATED_SENTINEL,
nodata: float | int | None = None) -> str:
nodata: float | int | None = None,
dst_offsets: list[tuple[int, int]] | None = None) -> str:
"""Generate a VRT file that mosaics multiple GeoTIFF tiles.

[internal-only] This helper backs ``to_geotiff``'s ``.vrt`` write
Expand Down Expand Up @@ -92,6 +93,14 @@ def _build_vrt(path: str = _VRT_PATH_MISSING_SENTINEL,
Integer sentinels (e.g. ``65535`` for uint16, ``-9999`` for
int32) are accepted so the surface lines up with the
``nodata`` kwarg on ``to_geotiff`` and ``_write_geotiff_gpu``.
dst_offsets : list of (int, int) or None, optional
[internal-only] Explicit pixel-space placement for
non-georeferenced sources, one ``(x_off, y_off)`` pair per
source file. Forwarded to ``_vrt.write_vrt``; see its docstring
for the contract. ``to_geotiff``'s ``.vrt`` path passes this
for non-georeferenced input so the index places each tile by
its pixel offset instead of the identity transform every such
tile carries (issue #3116).

Returns
-------
Expand Down Expand Up @@ -219,4 +228,5 @@ def _build_vrt(path: str = _VRT_PATH_MISSING_SENTINEL,
relative=relative,
crs_wkt=resolved_wkt,
nodata=nodata,
dst_offsets=dst_offsets,
)
9 changes: 5 additions & 4 deletions xrspatial/geotiff/tests/test_polish.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,13 +67,14 @@ def test_read_geotiff_dask_handles_vrt_directly(self, tmp_path):
arr = np.arange(64, dtype=np.float32).reshape(8, 8)
a_path = str(tmp_path / 'a_1488.tif')
b_path = str(tmp_path / 'b_1488.tif')
# Two tiles side-by-side via geo-transform attrs would normally be
# generated upstream; for this test we just need both files to
# share a CRS and the writer's default transform.
# The tiles carry no georeferencing, so the writer needs the
# side-by-side layout spelled out: write_vrt now refuses
# multiple non-georeferenced sources without explicit placement
# (issue #3116) instead of stacking them at the origin.
write(arr, a_path, compression='none')
write(arr, b_path, compression='none')
vrt_path = str(tmp_path / 'mosaic_1488.vrt')
wv(vrt_path, [a_path, b_path])
wv(vrt_path, [a_path, b_path], dst_offsets=[(0, 0), (8, 0)])

result = _read_geotiff_dask(vrt_path, chunks=8)
assert result.dims == ('y', 'x')
Expand Down
Loading