Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 9 additions & 4 deletions xrspatial/geotiff/_backends/dask.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,8 +194,13 @@ def read_geotiff_dask(source: str, *,
# and ``_CloudSource`` satisfies that contract. Going through it
# bounds metadata reads to ``MAX_HTTP_HEADER_BYTES`` instead of
# fetching the whole remote object up front. See PR #1755 review.
from .._reader import _is_fsspec_uri, _is_http_url
is_http = _is_http_url(source)
# Local imports: backend modules avoid eager-importing the reader /
# sources layer at module load so the package can be imported without
# urllib3 in environments that only consume the dask path.
# Issues #2323 / #2332.
from .._reader import _is_fsspec_uri
from .._sources import _is_http_source
is_http = _is_http_source(source)
is_fsspec = isinstance(source, str) and _is_fsspec_uri(source)
http_meta = None
http_meta_key = None
Expand Down Expand Up @@ -573,8 +578,8 @@ def _read(http_meta):
# fsspec-addressable remotes (s3://, gs://, az://, memory://, ...).
# Both source classes expose ``read_range``, which is all
# ``_fetch_decode_cog_http_tiles`` needs.
from .._reader import _is_http_url as _ihu
_is_http_src = _ihu(source)
from .._sources import _is_http_source as _ihs
_is_http_src = _ihs(source)
_is_fsspec_src = False
if http_meta is not None and isinstance(source, str) and \
not _is_http_src:
Expand Down
10 changes: 5 additions & 5 deletions xrspatial/geotiff/_backends/gpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -316,6 +316,7 @@ def read_geotiff_gpu(source: str, *,
from .._header import parse_all_ifds, parse_header, select_overview_ifd, validate_tile_layout
from .._reader import (MAX_PIXELS_DEFAULT, _check_dimensions, _FileSource, _is_fsspec_uri,
_max_tile_bytes_from_env, _resolve_masked_fill)
from .._sources import _is_http_source

# ``source`` is already coerced above (before the dispatch
# validator); no need to re-coerce here.
Expand Down Expand Up @@ -346,9 +347,8 @@ def read_geotiff_gpu(source: str, *,
# whole image either way for the eager path; the trade-off is a CPU
# decode instead of nvCOMP-on-GPU. Callers who want bounded GPU
# memory should pass ``chunks=...``.
from .._reader import _is_http_url
if isinstance(source, str) and (
_is_http_url(source)
_is_http_source(source)
or _is_fsspec_uri(source)):
return _read_geotiff_gpu_eager_via_cpu(
source, dtype=dtype, window=window,
Expand Down Expand Up @@ -1075,9 +1075,9 @@ def _gds_chunk_path_available(source, ifd, has_sparse_tile, orientation):
# import failure would silently let an HTTP URL into the kvikio
# branch (which opens the path as a local file and panics). The
# canonical case-insensitive helper is a sibling module, so the
# import is safe at module load time (#2323).
from .._sources import _is_http_url
if _is_http_url(source):
# import is safe at module load time. Issues #2323 / #2332.
from .._sources import _is_http_source
if _is_http_source(source):
return False
try:
from .._reader import _is_fsspec_uri
Expand Down
5 changes: 3 additions & 2 deletions xrspatial/geotiff/_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,8 @@
_BytesIOSource, _CloudSource, _coerce_path, _FileSource, _get_http_pool,
_get_pinned_conn_classes, _http_allow_private_hosts, _http_connect_timeout,
_http_read_timeout, _http_timeout_from_env, _HTTPSource, _ip_is_private,
_is_file_like, _is_fsspec_uri, _is_http_url, _make_pinned_pool,
_is_file_like, _is_fsspec_uri, _is_http_source, _is_http_url,
_make_pinned_pool,
_max_coalesced_range_bytes_from_env, _max_tile_bytes_from_env, _mmap_cache,
_mmap_cache_size_from_env, _MmapCache, _open_source,
_resolve_max_cloud_bytes, _validate_http_url, coalesce_ranges,
Expand Down Expand Up @@ -142,7 +143,7 @@ def _read_to_array(source, *, window=None, overview_level: int | None = None,
(np.ndarray, GeoInfo) tuple
"""
source = _coerce_path(source)
if _is_http_url(source):
if _is_http_source(source):
return _read_cog_http(source, overview_level=overview_level, band=band,
max_pixels=max_pixels, window=window,
allow_rotated=allow_rotated)
Expand Down
14 changes: 8 additions & 6 deletions xrspatial/geotiff/_sidecar.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,7 @@
# ``_reader`` imports ``_sidecar`` lazily (inside functions), so this
# top-level import does not form a cycle at module load time.
from ._reader import _is_fsspec_uri
# Canonical case-insensitive http(s) check (#2323). Reused via the
# wrapper ``_is_http_url`` below so existing imports keep working.
from ._sources import _is_http_url as _canonical_is_http_url
from ._sources import _is_http_source

#: Type of the bytes-like buffer a sidecar carries: an mmap for local
#: files, bytes for HTTP / fsspec downloads. Narrowed from ``object``
Expand All @@ -46,9 +44,13 @@ class SidecarOverviews(NamedTuple):


def _is_http_url(source: str) -> bool:
# Delegate to the canonical case-insensitive check so uppercase
# ``HTTP://`` URLs cannot dodge SSRF validation (issue #2323).
return _canonical_is_http_url(source)
"""Case-insensitive HTTP(S) scheme test for sidecar routing.

Delegates to :func:`xrspatial.geotiff._sources._is_http_source` so
the SSRF-relevant routing decision matches the rest of the package
(issues #2323 / #2332).
"""
return _is_http_source(source)


def find_sidecar(source) -> str | None:
Expand Down
45 changes: 29 additions & 16 deletions xrspatial/geotiff/_sources.py
Original file line number Diff line number Diff line change
Expand Up @@ -1452,32 +1452,45 @@ def close(self):
_CLOUD_SCHEMES = ('s3://', 'gs://', 'az://', 'abfs://')


def _is_http_url(path) -> bool:
"""Return True if *path* is an ``http://`` or ``https://`` URL.

Case-insensitive: URL schemes are case-insensitive per RFC 3986, so an
uppercase ``HTTP://`` or mixed-case ``Http://`` must dispatch to the
SSRF-validating :class:`_HTTPSource`, not to the fsspec branch. See
issue #2323.
def _is_http_source(source) -> bool:
"""Return True if ``source`` is an HTTP(S) URL, case-insensitively.

Centralized so every routing call site in ``xrspatial/geotiff/``
classifies the scheme the same way. Before this helper existed,
each call site did ``source.startswith(('http://', 'https://'))``,
which is case-sensitive and let ``HTTP://example.internal/...``
(uppercase) slip past :class:`_HTTPSource` and the SSRF allow-list
in :func:`_validate_http_url`. Per RFC 3986 section 3.1 URI schemes
are case-insensitive, so any uppercase / mixed-case variant has to
route through the same validator as ``http`` / ``https``.

Non-string inputs (``None``, ``bytes``, ``os.PathLike``, file-like
objects) return ``False`` so callers can drop the surrounding
``isinstance(_, str)`` check where they want to. Issues #2323 / #2332.
"""
if not isinstance(path, str):
return False
try:
scheme = urlparse(path).scheme
except (ValueError, TypeError):
if not isinstance(source, str) or not source:
return False
return scheme.lower() in ('http', 'https')
# ``urlparse`` strips off the scheme cleanly even for unusual inputs
# (e.g. ``HTTP:`` with no ``//``) and avoids the prefix-tuple trap.
return urlparse(source).scheme.lower() in ('http', 'https')


# Back-compat alias: earlier patches (#2323) shipped this same helper under
# the name ``_is_http_url`` and downstream tests / re-exports still use that
# name. Keep the alias so importers and the regression tests stay green.
_is_http_url = _is_http_source


def _is_fsspec_uri(path: str) -> bool:
"""Check if a path is a fsspec-compatible URI (not http/https/local).

Excludes http(s) case-insensitively so uppercase URLs cannot dodge the
SSRF allow-list and pinned DNS in :class:`_HTTPSource` (issue #2323).
SSRF allow-list and pinned DNS in :class:`_HTTPSource` (issues #2323 /
#2332).
"""
if not isinstance(path, str):
return False
if _is_http_url(path):
if _is_http_source(path):
return False
return '://' in path

Expand Down Expand Up @@ -1658,7 +1671,7 @@ def _open_source(source):
raise TypeError(
f"source must be a str path/URL or a binary file-like object "
f"with read+seek methods, got {type(source).__name__}")
if _is_http_url(source):
if _is_http_source(source):
return _HTTPSource(source)
if _is_fsspec_uri(source):
return _CloudSource(source)
Expand Down
24 changes: 18 additions & 6 deletions xrspatial/geotiff/_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,13 +61,11 @@
# (``_write``, ``_write_streaming``) and external importers (the
# ``_writers`` subpackage, tests, ``_gpu_decode``) keep using the
# ``xrspatial.geotiff._writer`` import path.
from ._sources import _is_http_source
from ._write_layout import (BO, _assemble_cog_layout, _assemble_standard_layout, # noqa: F401
_assemble_tiff, _build_ifd, _compute_classic_ifd_overhead,
_float_to_rational, _pack_tag_value, _promote_offsets_to_long8,
_serialize_tag_value, _should_use_bigtiff_streaming)
# Canonical case-insensitive http(s) check (#2323) so the writer-side
# fsspec gate cannot be tricked by uppercase URLs.
from ._sources import _is_http_url as _is_http_url_canonical

# Tag IDs the writer must never accept from ``extra_tags``. NewSubfileType
# (254) is a per-IFD status flag the writer emits on its own for overview
Expand Down Expand Up @@ -1243,12 +1241,16 @@ def _write_streaming(dask_data, path: str, *,
def _is_fsspec_uri(path) -> bool:
"""Check if a path is a fsspec-compatible URI (string only).

Excludes http(s) case-insensitively so uppercase URLs are not routed
through fsspec on the writer side (issue #2323).
HTTP(S) URLs are deliberately excluded here so the writer can raise
a typed "writes not supported over HTTP" error instead of handing
the URL to fsspec. Uses :func:`_sources._is_http_source` so the
HTTP detection is case-insensitive (RFC 3986); without that, an
uppercase ``HTTP://...`` slipped past this check and into fsspec.
Issues #2323 / #2332.
"""
if not isinstance(path, str):
return False
if _is_http_url_canonical(path):
if _is_http_source(path):
return False
return '://' in path

Expand Down Expand Up @@ -1280,6 +1282,16 @@ def _write_bytes(file_bytes: bytes | bytearray, path) -> None:
path.write(file_bytes)
return

# Reject HTTP(S) write targets with a typed error before the local
# file path tries to treat the URL as a filename. ``_is_http_source``
# is case-insensitive so ``HTTP://...`` reports the same friendly
# error as ``http://...``. Issue #2332.
if isinstance(path, str) and _is_http_source(path):
raise NotImplementedError(
f"Writes are not supported over HTTP(S). Got {path!r}. "
"Write to a local path or an fsspec-supported cloud URL "
"(s3://, gs://, az://, ...) instead.")

if _is_fsspec_uri(path):
try:
import fsspec
Expand Down
Loading
Loading