SoundMindsAI · SoundMindsAI · May 21, 2026 · May 21, 2026 · May 21, 2026 · May 21, 2026
@@ -109,10 +109,20 @@ backend/
                   judgment.py from feat_llm_judgments)
     services/    use-case orchestrators — cluster.py from infra_adapter_elastic;
                  study_state.py (state machine + FR-7 protection listener,
-                 feat_study_lifecycle Phase 2)
+                 feat_study_lifecycle Phase 2);
+                 study_confidence.py (async glue that runs the 4-query
+                 read pattern from feat_pr_metric_confidence FR-2 and
+                 hands pre-fetched data to the pure orchestrator —
+                 consumed by studies._detail, the open_pr worker, and
+                 the digest worker)
     domain/      pure business logic — query/render.py from
                  infra_adapter_elastic; study/{search_space,template_validator,
                  csv_parser}.py from feat_study_lifecycle Phase 2;
+                 study/confidence.py (feat_pr_metric_confidence —
+                 ConfidenceShape Pydantic model + 7 sub-shapes + bootstrap
+                 CI / runner-up gap / late-trial 1σ / convergence regime /
+                 per-query outcome helpers; pure-Python orchestrator
+                 returning None on every FR-7 degraded path);
                  git/{redaction,validation}.py from feat_github_pr_worker
                  (GitHub PAT redaction + repo_url + config_path validators)
     adapters/    engine adapters — protocol.py (SearchAdapter Protocol +
@@ -180,7 +190,10 @@ migrations/      Alembic config + versions/ (0001 baseline + 0002 clusters
                  + 0003 study_lifecycle_schema + 0004_judgments + 0005_digests
                  + 0006 proposals_pr_url_idx + 0007 conversations_messages +
                  0008–0013 search_vector + GIN indexes from
-                 feat_data_table_primitive)
+                 feat_data_table_primitive + 0014 clusters_target_filter
+                 from feat_cluster_target_filter + 0015 trials_per_query_metrics
+                 from feat_pr_metric_confidence — nullable JSONB column +
+                 CHECK constraint enforcing IS NULL OR jsonb_typeof = 'object')
 docs/            00_overview / 01_architecture / 02_product / 03_runbooks /
                  04_security / 05_quality / 08_guides
 ```

@@ -16,7 +16,7 @@
 
 from __future__ import annotations
 
-from typing import Annotated
+from typing import Annotated, Any
 
 from fastapi import APIRouter, Depends, HTTPException, status
 from pydantic import BaseModel, ConfigDict, Field
@@ -77,6 +77,24 @@ class SeedCompletedStudyRequest(BaseModel):
             "aria-disabled-button + tooltip path."
         ),
     )
+    winner_per_query: dict[str, dict[str, Any]] | None = Field(
+        default=None,
+        description=(
+            "Optional per-query metrics dict to populate on the winner "
+            "trial. Shape: `{query_id: {metric_token: float}}` where "
+            "metric_token matches what `scoring.score()` emits (e.g. "
+            "`ndcg@10`). Set alongside `runner_up_per_query` to drive the "
+            "ConfidencePanel happy path on `/studies/[id]`. When omitted, "
+            "the seeded trials have `per_query_metrics IS NULL` (the "
+            "pre-feat_pr_metric_confidence shape)."
+        ),
+    )
+    runner_up_per_query: dict[str, dict[str, Any]] | None = Field(
+        default=None,
+        description=(
+            "Optional per-query metrics for the runner-up trial; pairs with `winner_per_query`."
+        ),
+    )
 
 
 class SeedCompletedStudyResponse(BaseModel):
@@ -125,6 +143,8 @@ async def seed_completed_study(  # pragma: no cover  - integration only
         template_id=body.template_id,
         judgment_list_id=body.judgment_list_id,
         with_pending_proposal=body.with_pending_proposal,
+        winner_per_query=body.winner_per_query,
+        runner_up_per_query=body.runner_up_per_query,
     )
     await db.commit()
     return SeedCompletedStudyResponse(

@@ -24,6 +24,12 @@
 
 from backend.app.adapters.protocol import TargetInfo
 from backend.app.core.settings import get_settings
+from backend.app.domain.study.confidence import ConfidenceShape as ConfidenceShape
+
+# ``ConfidenceShape`` is defined in :mod:`backend.app.domain.study.confidence`
+# (the canonical assembler module per Story 1.3). The explicit ``as`` re-export
+# above keeps it importable via ``from backend.app.api.v1.schemas import
+# ConfidenceShape`` under mypy strict's ``no_implicit_reexport``.
 
 EngineType = Literal["elasticsearch", "opensearch"]
 """Response-only: values are guaranteed by service-layer validation before the
@@ -634,6 +640,13 @@ class StudyDetail(BaseModel):
     started_at: datetime | None
     completed_at: datetime | None
     trials_summary: TrialsSummaryShape
+    confidence: ConfidenceShape | None = None
+    """Per-study metric-confidence analytics (feat_pr_metric_confidence FR-5a).
+
+    ``None`` when the study has no winner trial (still running or
+    ``best_trial_id`` points at a deleted row — AC-3a). Otherwise a partial
+    or full :class:`ConfidenceShape` per FR-7's graceful-degradation
+    contract."""
 
 
 class StudySummary(BaseModel):

@@ -63,6 +63,7 @@
     validate_against_template,
 )
 from backend.app.services import study_state
+from backend.app.services.study_confidence import fetch_study_confidence
 
 router = APIRouter()
 
@@ -117,6 +118,7 @@ def _decode_trial_cursor(raw: str, sort_key: str) -> tuple[Any, str]:
 
 async def _detail(db: AsyncSession, row: Study) -> StudyDetail:
     summary = await repo.aggregate_trials_summary(db, row.id)
+    confidence = await fetch_study_confidence(db, row)
     return StudyDetail(
         id=row.id,
         name=row.name,
@@ -145,6 +147,7 @@ async def _detail(db: AsyncSession, row: Study) -> StudyDetail:
             pruned=summary.pruned,
             best_primary_metric=summary.best_primary_metric,
         ),
+        confidence=confidence,
     )
 
 

@@ -13,6 +13,19 @@
 The ``trials_study_metric`` index on ``(study_id, primary_metric DESC NULLS
 LAST)`` is created in the migration (Story 1.2) — not declared at the ORM
 level — so the ``DESC NULLS LAST`` ordering survives ``--autogenerate``.
+
+The ``per_query_metrics`` JSONB column (nullable; added by migration
+``0015_trials_per_query_metrics`` for feat_pr_metric_confidence) carries the
+per-query pytrec_eval scores from ``scoring.py::score()``'s ``per_query``
+dict. Shape: ``{query_id: {metric_token: float}}`` where ``metric_token`` is
+the user-facing token emitted by :func:`backend.app.eval.scoring.score` —
+i.e. ``@<k>``-suffixed for cutoff-aware metrics (``ndcg@10``, ``map@10``,
+``precision@10``, ``recall@10``) and bare names for cutoff-free metrics
+(``mrr``, plain ``map``). The base name (everything before any ``@``) is
+constrained to ``MetricCatalog`` (``ndcg``, ``map``, ``precision``,
+``recall``, ``mrr``). The ``trials_per_query_metrics_object_check`` CHECK
+constraint enforces NULL-or-object at the DB level (since the write path is
+the Arq ``run_trial`` worker, not a Pydantic-validated HTTP request).
 """
 
 from __future__ import annotations
@@ -36,6 +49,10 @@ class Trial(Base):
             "status IN ('complete', 'failed', 'pruned')",
             name="trials_status_check",
         ),
+        CheckConstraint(
+            "per_query_metrics IS NULL OR jsonb_typeof(per_query_metrics) = 'object'",
+            name="trials_per_query_metrics_object_check",
+        ),
     )
 
     id: Mapped[str] = mapped_column(String(36), primary_key=True)
@@ -62,6 +79,15 @@ class Trial(Base):
     """``{ndcg@10: ..., map: ..., p@10: ...}`` — every metric the study's
     objective enumerated, scored by ``backend/eval/scoring.py`` (lands in
     ``infra_optuna_eval``)."""
+    per_query_metrics: Mapped[dict[str, Any] | None] = mapped_column(JSONB, nullable=True)
+    """Per-query pytrec_eval scores from ``scoring.py::score()``'s
+    ``per_query`` dict, persisted on every successful trial (NULL on
+    failure/pruned and on trials predating migration 0015). Shape:
+    ``{query_id: {metric_name: float}}`` using user-facing metric names
+    (``ndcg``, ``map``, ``precision``, ``recall``, ``mrr``). Consumed by
+    ``backend.app.domain.study.confidence`` to compute the
+    ``ConfidenceShape`` surfaced on ``StudyDetail`` + PR body + digest
+    narrative (feat_pr_metric_confidence)."""
     duration_ms: Mapped[int | None] = mapped_column(Integer, nullable=True)
     """Wall-clock from ``study.ask()`` to ``study.tell()`` for this trial."""
     status: Mapped[str] = mapped_column(Text, nullable=False)