diff --git a/src/adcp/validation/schema_loader.py b/src/adcp/validation/schema_loader.py index a1e056faa..1b2c7a141 100644 --- a/src/adcp/validation/schema_loader.py +++ b/src/adcp/validation/schema_loader.py @@ -2,15 +2,17 @@ Loads the bundled per-tool schemas shipped with the SDK plus the ``core/`` schemas that async response variants ``$ref``, then compiles validators -lazily by ``(tool_name, direction)``. +lazily by ``(tool_name, direction, bundle_key)``. Schemas live under a per-version bundle key (see :func:`adcp.validation.version.resolve_bundle_key`) so multiple AdCP spec -versions can coexist on disk. The loader today resolves the cache for the -SDK's pinned ``ADCP_VERSION``; the per-request version arg lands in -Stage 2. +versions can coexist. Callers pass an optional ``version`` to +:func:`get_validator`; ``None`` defaults to the SDK's compile-time pin +(``ADCP_VERSION``). Each bundle key gets its own ``_LoaderState`` — file +index, compiled validators, core registry — so cross-version traffic +doesn't share compilation state. -Discovery paths (first hit wins): +Discovery paths (first hit wins, per bundle key): * **Installed package** — ``importlib.resources.files("adcp") / "_schemas" / {bundle_key}`` populated by ``scripts/bundle_schemas.py`` before wheel @@ -99,15 +101,24 @@ def _resolve_schema_root(bundle_key: str | None = None) -> _SchemaRoot | None: class _LoaderState: - def __init__(self, root: _SchemaRoot) -> None: + def __init__(self, root: _SchemaRoot, bundle_key: str) -> None: self.root = root + self.bundle_key = bundle_key self.file_index: dict[tuple[str, Direction], Path] = {} self.compiled: dict[tuple[str, Direction], Any] = {} self.registry: dict[str, dict[str, Any]] = {} self._core_loaded = False -_state: _LoaderState | None = None +# Per-bundle-key state. Each version (``3.0``, ``2.5``, ``3.1.0-beta.1``) +# gets its own file index, compiled validator cache, and core registry — +# shared compilation state across versions would let a ``$ref`` from a +# v2.5 schema resolve to a v3.0 core type with the same ``$id``. +_states: dict[str, _LoaderState] = {} +# Negative cache: bundle keys we've already tried to resolve and found +# nothing on disk for. Distinguishes a true negative from "not yet looked +# up" so we don't walk the filesystem twice per missing version. +_state_misses: set[str] = set() def _walk_json(dir_: Path) -> list[Path]: @@ -146,23 +157,48 @@ def _build_index(root: _SchemaRoot) -> dict[tuple[str, Direction], Path]: return index -def _ensure_state() -> _LoaderState | None: - global _state - if _state is not None: - return _state +def _resolve_bundle_key_for_version(version: str | None) -> str: + """Resolve a caller-supplied version (or ``None``) to a bundle key.""" + if version is None: + return _sdk_pinned_bundle_key() + return resolve_bundle_key(version) + + +def _ensure_state(version: str | None = None) -> _LoaderState | None: + """Return the loader state for ``version`` (default: SDK pin). + + Each bundle key is initialized once and cached for the process + lifetime. ``None`` is returned when the bundle isn't on disk for + this version — callers degrade to ``skipped`` validation, same as + pre-Stage-2 behaviour when the cache is missing entirely. + """ + bundle_key = _resolve_bundle_key_for_version(version) + cached = _states.get(bundle_key) + if cached is not None: + return cached + if bundle_key in _state_misses: + return None with _init_lock: # Double-checked pattern: re-read inside the lock in case another # thread initialized while we were waiting. - if _state is not None: - return _state - root = _resolve_schema_root() + cached = _states.get(bundle_key) + if cached is not None: + return cached + if bundle_key in _state_misses: + return None + root = _resolve_schema_root(bundle_key) if root is None: - logger.debug("AdCP schemas not found; schema validation will skip all tools") + logger.debug( + "AdCP schemas not found for bundle_key=%s; validation will skip " + "all tools for this version", + bundle_key, + ) + _state_misses.add(bundle_key) return None - new_state = _LoaderState(root) + new_state = _LoaderState(root, bundle_key) new_state.file_index = _build_index(root) - _state = new_state - return _state + _states[bundle_key] = new_state + return new_state def _load_core_registry(state: _LoaderState) -> None: @@ -216,14 +252,26 @@ def _make_ref_resolver(state: _LoaderState, base_file: Path, schema: dict[str, A return RefResolver(base_uri=base_uri, referrer=schema, store=dict(state.registry)) -def get_validator(tool_name: str, direction: Direction) -> Any | None: - """Return a compiled validator for ``(tool_name, direction)`` or ``None``. - - ``None`` means no schema ships for this pair — callers should skip - validation (e.g., custom tools outside the AdCP catalog, or sync-only - tools asked for an async variant that doesn't exist). +def get_validator( + tool_name: str, + direction: Direction, + *, + version: str | None = None, +) -> Any | None: + """Return a compiled validator for ``(tool_name, direction, version)``. + + Returns ``None`` when no schema ships for this pair — callers should + skip validation (e.g., custom tools outside the AdCP catalog, or + sync-only tools asked for an async variant that doesn't exist, or a + version whose bundle isn't on disk). + + ``version=None`` resolves to the SDK's compile-time pin + (``ADCP_VERSION``). Pass a wire-version string (e.g. ``"3.0.7"``, + ``"2.5"``, ``"3.1.0-beta.1"``) to validate against a non-current + schema — :func:`adcp.validation.version.resolve_bundle_key` collapses + it to the cache key. """ - state = _ensure_state() + state = _ensure_state(version) if state is None: return None key = (tool_name, direction) @@ -264,9 +312,9 @@ def get_validator(tool_name: str, direction: Direction) -> Any | None: return validator -def list_validator_keys() -> list[str]: +def list_validator_keys(*, version: str | None = None) -> list[str]: """Every ``tool::direction`` pair with a shipped schema. Used by tests.""" - state = _ensure_state() + state = _ensure_state(version) if state is None: return [] return sorted(f"{tool}::{direction}" for (tool, direction) in state.file_index) @@ -274,5 +322,5 @@ def list_validator_keys() -> list[str]: def _reset_for_tests() -> None: """Clear cached state so a fresh resolve runs. Test-only.""" - global _state - _state = None + _states.clear() + _state_misses.clear() diff --git a/src/adcp/validation/schema_validator.py b/src/adcp/validation/schema_validator.py index 102900b63..ca0120939 100644 --- a/src/adcp/validation/schema_validator.py +++ b/src/adcp/validation/schema_validator.py @@ -293,9 +293,21 @@ def _iter_errors_bounded(validator: Any, payload: Any) -> list[Any]: return errors -def validate_request(tool_name: str, payload: Any) -> ValidationOutcome: - """Validate an outgoing request against ``{tool}-request.json``.""" - validator = get_validator(tool_name, "request") +def validate_request( + tool_name: str, + payload: Any, + *, + version: str | None = None, +) -> ValidationOutcome: + """Validate an outgoing request against ``{tool}-request.json``. + + ``version=None`` validates against the SDK's compile-time-pinned + schema. Pass an explicit wire version (e.g. ``"2.5"``, ``"3.0.7"``) + to validate against a non-current bundle — the dispatcher uses this + when the buyer claims a legacy ``adcp_major_version`` so the request + is checked against the schema the buyer actually targets. + """ + validator = get_validator(tool_name, "request", version=version) if validator is None: return _OK_SKIPPED if _count_nodes(payload, _MAX_PAYLOAD_NODES) >= _MAX_PAYLOAD_NODES: @@ -343,13 +355,22 @@ def _select_response_variant(payload: Any) -> ResponseVariant: return "sync" -def validate_response(tool_name: str, payload: Any) -> ValidationOutcome: - """Validate an incoming response, selecting the variant by payload shape.""" +def validate_response( + tool_name: str, + payload: Any, + *, + version: str | None = None, +) -> ValidationOutcome: + """Validate an incoming response, selecting the variant by payload shape. + + ``version`` semantics match :func:`validate_request` — defaults to the + SDK pin; pass a wire version to validate against a legacy schema. + """ variant: ResponseVariant = _select_response_variant(payload) - validator = get_validator(tool_name, variant) + validator = get_validator(tool_name, variant, version=version) used_variant: Direction = variant if validator is None and variant != "sync": - validator = get_validator(tool_name, "sync") + validator = get_validator(tool_name, "sync", version=version) used_variant = "sync" if validator is None: return _OK_SKIPPED diff --git a/src/adcp/validation/version.py b/src/adcp/validation/version.py index 7898e8c10..56372b412 100644 --- a/src/adcp/validation/version.py +++ b/src/adcp/validation/version.py @@ -22,24 +22,40 @@ # ``MAJOR.MINOR.PATCH`` with an optional ``-PRERELEASE`` tail. Build # metadata (``+SHA``) is intentionally not in the SDK contract — adopters # pin to release identifiers. -_SEMVER_RE = re.compile( +_FULL_SEMVER_RE = re.compile( r"^(?P\d+)\.(?P\d+)\.(?P\d+)(?:-(?P[0-9A-Za-z.-]+))?$" ) +# Bare ``MAJOR.MINOR`` — already a bundle key. The wire's +# ``adcp_version`` field (3.1+) is emitted at this precision per the +# version-envelope spec, so callers passing it through verbatim land here. +_MAJOR_MINOR_RE = re.compile(r"^(?P\d+)\.(?P\d+)$") def resolve_bundle_key(version: str) -> str: """Collapse a version string to its on-disk cache key. - Raises ``ValueError`` for non-semver inputs — the schema fetch - pipeline pins on real release identifiers, so a malformed version - here is a real bug in the caller, not user input. + Accepts: + + * ``MAJOR.MINOR.PATCH`` — collapsed to ``MAJOR.MINOR``. + * ``MAJOR.MINOR.PATCH-PRERELEASE`` — kept exact; prereleases ship with + breaking changes vs. the matching stable, so each one is its own bucket. + * ``MAJOR.MINOR`` — passed through as-is (already a bundle key; matches + the wire-level ``adcp_version`` field's release-precision shape). + + Raises ``ValueError`` for anything else — adopters pin on real release + identifiers, so a malformed version is a real bug. """ - match = _SEMVER_RE.match(version.strip()) - if match is None: - raise ValueError( - f"resolve_bundle_key: {version!r} is not a valid semver " - "(expected MAJOR.MINOR.PATCH[-PRERELEASE])" - ) - if match.group("prerelease"): - return version.strip() - return f"{match.group('major')}.{match.group('minor')}" + stripped = version.strip() + full = _FULL_SEMVER_RE.match(stripped) + if full is not None: + if full.group("prerelease"): + return stripped + return f"{full.group('major')}.{full.group('minor')}" + mm = _MAJOR_MINOR_RE.match(stripped) + if mm is not None: + return stripped + raise ValueError( + f"resolve_bundle_key: {version!r} is not a valid version " + "(expected MAJOR.MINOR, MAJOR.MINOR.PATCH, or " + "MAJOR.MINOR.PATCH-PRERELEASE)" + ) diff --git a/tests/test_schema_loader_per_version.py b/tests/test_schema_loader_per_version.py new file mode 100644 index 000000000..f13198bc8 --- /dev/null +++ b/tests/test_schema_loader_per_version.py @@ -0,0 +1,214 @@ +"""Stage 2 tests: per-version validator loader. + +Exercises the ``version=`` kwarg on :func:`get_validator`, +:func:`validate_request`, and :func:`validate_response`. Builds a +synthetic legacy bundle in ``tmp_path`` and monkeypatches the loader's +resolver to find it — keeps the test isolated from the repo's working +tree (which CI sometimes runs against an installed wheel where the +packaged path wins) and from concurrent fixture cleanup. +""" + +from __future__ import annotations + +import json +import shutil +from pathlib import Path + +import pytest + +from adcp.validation import schema_loader as _loader_mod +from adcp.validation.schema_loader import ( + _reset_for_tests, + _resolve_schema_root, + _SchemaRoot, + _sdk_pinned_bundle_key, + get_validator, + list_validator_keys, +) +from adcp.validation.schema_validator import validate_request, validate_response + + +@pytest.fixture +def synthetic_legacy_bundle(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> tuple[str, Path]: + """Yield a ``(bundle_key, schemas_root)`` pair for a synthetic legacy + bundle laid out under ``tmp_path``. + + The loader's resolver is monkeypatched to return our synthetic root + when asked for the legacy bundle key. The SDK-pinned key falls back + to the real resolver. This isolates the test from the repo's working + tree and from the installed-wheel discovery path (which would + otherwise win). + """ + legacy_key = "2.5" + legacy_root = tmp_path / "cache" / legacy_key + bundled = legacy_root / "bundled" + core = legacy_root / "core" + bundled.mkdir(parents=True) + core.mkdir() + + request_schema = { + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Synthetic Tool Request", + "type": "object", + "required": ["legacy_field"], + "properties": { + "legacy_field": {"type": "string"}, + "extra_field": {"type": "integer"}, + }, + "additionalProperties": False, + } + response_schema = { + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Synthetic Tool Response", + "type": "object", + "required": ["result"], + "properties": {"result": {"type": "string"}}, + } + + (bundled / "synthetic-tool-request.json").write_text( + json.dumps(request_schema), encoding="utf-8" + ) + (bundled / "synthetic-tool-response.json").write_text( + json.dumps(response_schema), encoding="utf-8" + ) + + real_resolve = _resolve_schema_root + + def _fake_resolve(bundle_key: str | None = None) -> _SchemaRoot | None: + if bundle_key == legacy_key: + return _SchemaRoot(legacy_root) + return real_resolve(bundle_key) + + monkeypatch.setattr(_loader_mod, "_resolve_schema_root", _fake_resolve) + + _reset_for_tests() + try: + yield legacy_key, legacy_root + finally: + _reset_for_tests() + # ``tmp_path`` is cleaned up by pytest, but a non-empty leftover + # from a partial test run shouldn't break the next session. + shutil.rmtree(legacy_root, ignore_errors=True) + + +def test_resolve_schema_root_returns_different_paths_per_bundle_key( + synthetic_legacy_bundle: tuple[str, Path], +) -> None: + legacy_key, legacy_path = synthetic_legacy_bundle + sdk_key = _sdk_pinned_bundle_key() + assert sdk_key != legacy_key, ( + "Test fixture assumes the SDK pin isn't 2.5 — pick a different " + "synthetic key if the SDK ever pins to 2.5.x" + ) + + # Go through the module attribute so the fixture's monkeypatch fires. + legacy_root = _loader_mod._resolve_schema_root(legacy_key) + sdk_root = _loader_mod._resolve_schema_root(sdk_key) + + assert legacy_root is not None + assert sdk_root is not None + assert legacy_root.root == legacy_path + assert legacy_root.root != sdk_root.root + + +def test_get_validator_per_version_finds_only_its_own_tools( + synthetic_legacy_bundle: tuple[str, Path], +) -> None: + legacy_key, _ = synthetic_legacy_bundle + + # Legacy bundle has only synthetic_tool. + legacy_keys = list_validator_keys(version=legacy_key) + assert "synthetic_tool::request" in legacy_keys + assert "synthetic_tool::sync" in legacy_keys + + # SDK pin doesn't have synthetic_tool. + sdk_keys = list_validator_keys() # None → SDK pin + assert "synthetic_tool::request" not in sdk_keys + + +def test_get_validator_returns_independent_validators_per_version( + synthetic_legacy_bundle: tuple[str, Path], +) -> None: + legacy_key, _ = synthetic_legacy_bundle + + # Legacy validator: synthetic_tool exists. + legacy_validator = get_validator("synthetic_tool", "request", version=legacy_key) + assert legacy_validator is not None + + # SDK pin: synthetic_tool does NOT exist; same call returns None. + sdk_validator = get_validator("synthetic_tool", "request") + assert sdk_validator is None + + +def test_get_validator_same_tool_different_versions_compiles_separately( + synthetic_legacy_bundle: tuple[str, Path], +) -> None: + """Pick a tool that exists in the SDK pin; assert a legacy-bundle call + that *doesn't* ship that tool returns None — proving the loader keys + on bundle, not just tool name. + """ + legacy_key, _ = synthetic_legacy_bundle + + # ``get_products`` exists in the SDK pin (3.0+). + sdk_validator = get_validator("get_products", "request") + assert sdk_validator is not None + + # The synthetic legacy bundle doesn't ship get_products → None. + legacy_validator = get_validator("get_products", "request", version=legacy_key) + assert legacy_validator is None + + +def test_validate_request_threads_version_through( + synthetic_legacy_bundle: tuple[str, Path], +) -> None: + legacy_key, _ = synthetic_legacy_bundle + + # Legacy schema requires ``legacy_field`` and forbids unknown keys. + valid = validate_request("synthetic_tool", {"legacy_field": "hello"}, version=legacy_key) + assert valid.valid + + missing_required = validate_request("synthetic_tool", {"extra_field": 7}, version=legacy_key) + assert not missing_required.valid + assert any("legacy_field" in (issue.message or "") for issue in missing_required.issues) + + extra_field_rejected = validate_request( + "synthetic_tool", + {"legacy_field": "x", "not_in_schema": 1}, + version=legacy_key, + ) + assert not extra_field_rejected.valid + + +def test_validate_request_unknown_version_skips_safely( + synthetic_legacy_bundle: tuple[str, Path], +) -> None: + """A version we don't have on disk degrades to ``skipped`` rather + than crashing — same contract as a missing schema for the SDK pin.""" + outcome = validate_request("synthetic_tool", {"x": 1}, version="9.9.9") + # ``skipped`` semantics: ``valid=True`` with no issues. + assert outcome.valid + assert outcome.issues == [] + + +def test_validate_response_threads_version_through( + synthetic_legacy_bundle: tuple[str, Path], +) -> None: + legacy_key, _ = synthetic_legacy_bundle + + valid = validate_response("synthetic_tool", {"result": "ok"}, version=legacy_key) + assert valid.valid + + bad = validate_response("synthetic_tool", {"result": 42}, version=legacy_key) + assert not bad.valid + + +def test_default_version_unchanged_when_arg_omitted( + synthetic_legacy_bundle: tuple[str, Path], +) -> None: + """``version=None`` (or omitted) keeps the SDK-pin behaviour exactly. + Regression guard: Stage 2 must not change validator selection for + existing call sites that don't pass ``version=``.""" + # Pick a tool that ships in the SDK pin. + explicit_default = get_validator("get_products", "request", version=None) + omitted = get_validator("get_products", "request") + assert explicit_default is omitted diff --git a/tests/test_sync_schemas.py b/tests/test_sync_schemas.py index 6f9842bc6..3c28c9196 100644 --- a/tests/test_sync_schemas.py +++ b/tests/test_sync_schemas.py @@ -122,7 +122,7 @@ def test_caller_resolves_bundle_key_from_target_not_effective(self) -> None: from adcp.validation.version import resolve_bundle_key # ``effective_version`` after a 404-fallback: - with pytest.raises(ValueError, match="not a valid semver"): + with pytest.raises(ValueError, match="not a valid version"): resolve_bundle_key("latest") # ``target_version`` (the SDK pin) always parses: diff --git a/tests/test_validation_version.py b/tests/test_validation_version.py index 5242e617d..82f20d639 100644 --- a/tests/test_validation_version.py +++ b/tests/test_validation_version.py @@ -29,12 +29,22 @@ def test_resolve_bundle_key_strips_whitespace() -> None: assert resolve_bundle_key(" 3.0.7 ") == "3.0" +def test_resolve_bundle_key_accepts_major_minor_pass_through() -> None: + """Bare ``MAJOR.MINOR`` is already a bundle key — passed through. + + The wire envelope's ``adcp_version`` field (3.1+) is emitted at this + precision, so the dispatcher can hand it straight to the loader. + """ + assert resolve_bundle_key("3.0") == "3.0" + assert resolve_bundle_key("2.5") == "2.5" + + def test_resolve_bundle_key_rejects_garbage() -> None: - with pytest.raises(ValueError, match="not a valid semver"): + with pytest.raises(ValueError, match="not a valid version"): resolve_bundle_key("latest") - with pytest.raises(ValueError, match="not a valid semver"): - resolve_bundle_key("3.0") - with pytest.raises(ValueError, match="not a valid semver"): + with pytest.raises(ValueError, match="not a valid version"): resolve_bundle_key("v3.0.7") - with pytest.raises(ValueError, match="not a valid semver"): + with pytest.raises(ValueError, match="not a valid version"): + resolve_bundle_key("3") + with pytest.raises(ValueError, match="not a valid version"): resolve_bundle_key("")