From 88dca3114688e5ab82cf6ef2c1c544cf39e83f5c Mon Sep 17 00:00:00 2001
From: Pengfei Hu <pengfei@threemoonslab.com>
Date: Mon, 22 Jun 2026 11:09:10 -0700
Subject: [PATCH 1/3] Defer undeclared tool-surface changes from check to
 verify
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

`shipgate check` (and the MCP `shipgate.check` tool) returned `allow` on a
capability-adding diff that touches a tool/capability surface the manifest does
not declare — or when there is no manifest — even though the trigger already
flags the diff in scope. That is a false clear: only `verify` compiles the
capability delta, so an agent acting on a boundary `allow` could ship a change
`verify` would block.

main already defers manifest-declared tool sources (`capability_surfaces_changed`
→ `coverage_gap`). This extends that coverage to the undeclared / no-manifest
case:

- Fire only when a content-driven `run_shipgate` trigger rule matched (e.g.
  TRIGGER-MCP-EXPORT-CHANGED), never the blanket `force_run` rule — so a
  docs-only edit in an opted-in repo stays `allow` (no noise).
- Require a changed file the boundary evaluator does not itself inspect
  (`not is_boundary_path`), so a clean `allow` over `.codex/config.toml`,
  `AGENTS.md`, or the Shipgate workflow stays authoritative.
- Escalate `allow` → `warn` and route to `agents-shipgate detect`
  (declare-then-verify), because `verify` only gates *declared* surfaces;
  routing straight to verify would falsely reassure on a surface it never scans.

Never downgrades a real boundary `block` / `require_review`. The MCP
`shipgate.check` tool inherits the fix automatically via the shared
`build_codex_agent_result` path. No schema or check-ID changes.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 src/agents_shipgate/core/codex_boundary.py | 105 ++++++++++++++++-
 tests/test_codex_boundary_check.py         | 131 +++++++++++++++++++++
 2 files changed, 230 insertions(+), 6 deletions(-)

diff --git a/src/agents_shipgate/core/codex_boundary.py b/src/agents_shipgate/core/codex_boundary.py
index 7ef3de9b..aab63100 100644
--- a/src/agents_shipgate/core/codex_boundary.py
+++ b/src/agents_shipgate/core/codex_boundary.py
@@ -458,13 +458,36 @@ def add(rule_id: str, *, path: str | None, evidence: dict[str, Any]) -> None:
     decision = _decision_for(violations, release_decision=release_decision)
 
     # Coverage gap: check is boundary-only, so a clean ``allow`` over a diff
-    # that touches a manifest-declared tool surface would silently green-light
-    # a capability change that only ``verify`` gates. Escalate to ``warn`` and
-    # route to verify instead. Gated on ``release_decision is None`` because a
-    # provided release decision means the full capability scan already ran.
+    # that touches a tool/capability surface would silently green-light a
+    # capability change that only ``verify`` gates. Escalate to ``warn`` and
+    # route onward. Gated on ``release_decision is None`` because a provided
+    # release decision means the full capability scan already ran.
+    #
+    # Two sub-cases:
+    #   * ``coverage_gap`` — the changed surface is one the manifest DECLARES as
+    #     a tool source, so ``verify`` will gate it: route straight to verify.
+    #   * ``undeclared_gap`` — the trigger flags the diff as a tool-surface
+    #     change but the manifest declares no matching source (or there is no
+    #     manifest), so ``verify`` cannot gate it yet: route to declare-then-
+    #     verify instead of a misleading clean ``verify`` over a surface it
+    #     never scans.
+    #
+    # The undeclared case requires a changed file the boundary evaluator does
+    # NOT itself inspect (``not is_boundary_path``): a clean ``allow`` over a
+    # purely Codex-local surface (``.codex/config.toml``, ``AGENTS.md``, the
+    # Shipgate workflow) is authoritative — ``check`` already evaluated it — so
+    # it is not a coverage gap. Same exclusion ``_declared_tool_surfaces_changed``
+    # applies to declared sources.
+    boundary_clean_allow = decision == "allow" and release_decision is None
     coverage_surfaces = sorted(dict.fromkeys(capability_surfaces_changed or []))
-    coverage_gap = decision == "allow" and release_decision is None and bool(coverage_surfaces)
-    if coverage_gap:
+    coverage_gap = boundary_clean_allow and bool(coverage_surfaces)
+    undeclared_gap = (
+        boundary_clean_allow
+        and not coverage_surfaces
+        and _trigger_in_scope(trigger)
+        and any(not is_boundary_path(path) for path in changed_files)
+    )
+    if coverage_gap or undeclared_gap:
         decision = "warn"
 
     risk_level = _risk_for(violations)
@@ -484,6 +507,12 @@ def add(rule_id: str, *, path: str | None, evidence: dict[str, Any]) -> None:
         diagnostics = [*diagnostics, _coverage_diagnostic(coverage_surfaces)]
         trace = [*_trace_for(policy, decision, violations), _coverage_trace(coverage_surfaces)]
         suggested_fixes = [_VERIFY_COMMAND]
+    elif undeclared_gap:
+        first_next_action = _undeclared_next_action()
+        summary = _undeclared_summary()
+        diagnostics = [*diagnostics, _undeclared_diagnostic()]
+        trace = [*_trace_for(policy, decision, violations), _undeclared_trace()]
+        suggested_fixes = [_DETECT_COMMAND, _VERIFY_COMMAND]
     else:
         first_next_action = _next_action_for(decision, violations, repair)
         summary = _summary_for(decision, violations)
@@ -1338,6 +1367,70 @@ def _risk_for(violations: list[AgentResultViolatedRule]) -> AgentResultRiskLevel
 # auto-detects the base (v0.13) and emits the boundary-result surface, so it
 # works for both the local working tree and committed refs.
 _VERIFY_COMMAND = "agents-shipgate verify --json"
+_DETECT_COMMAND = "agents-shipgate detect --json"
+
+
+def _trigger_in_scope(trigger: dict[str, Any] | None) -> bool:
+    """True when a CONTENT-driven trigger rule flags a tool/capability-surface change.
+
+    Keys on a matched rule whose ``action`` is ``run_shipgate`` — the
+    path/diff-driven tool-surface rules (MCP export, tool decorator, OpenAPI,
+    framework bump). Deliberately NOT the top-level ``run_shipgate`` boolean:
+    that is also ``True`` for the blanket ``force_run`` rule
+    (``TRIGGER-EXISTING-MANIFEST-PRESENT``), which fires on every diff in an
+    opted-in repo and would turn a docs-only edit into a coverage warn. ``None``
+    (a bare function call with no trigger context) is out of scope, so the
+    boundary evaluator keeps its prior behavior when called directly.
+    """
+    if not isinstance(trigger, dict):
+        return False
+    return any(
+        isinstance(rule, dict) and rule.get("action") == "run_shipgate"
+        for rule in trigger.get("matched_rules") or []
+    )
+
+
+def _undeclared_next_action() -> AgentResultNextAction:
+    return AgentResultNextAction(
+        actor="coding_agent",
+        kind="warn",
+        command=_DETECT_COMMAND,
+        why=(
+            "This diff changes a tool/capability surface that shipgate.yaml does not "
+            "declare, so neither check nor verify gates it yet. Declare the surface "
+            "(run detect or add it to tool_sources), then run verify before completing."
+        ),
+    )
+
+
+def _undeclared_summary() -> str:
+    return (
+        "No Codex boundary rule fired, but the diff changes a tool/capability surface "
+        "that shipgate.yaml does not declare, so verify cannot gate it yet. Declare it "
+        "(detect or tool_sources) and run verify before reporting completion."
+    )
+
+
+def _undeclared_diagnostic() -> AgentResultDiagnostic:
+    return AgentResultDiagnostic(
+        level="warning",
+        code="undeclared_capability_surface",
+        message=(
+            "A changed tool/capability surface is not declared in shipgate.yaml; "
+            "check is boundary-only and verify only gates declared surfaces. Declare "
+            "the surface, then verify."
+        ),
+    )
+
+
+def _undeclared_trace() -> AgentResultTraceEvent:
+    return AgentResultTraceEvent(
+        step="coverage",
+        summary=(
+            "boundary_only: in-scope diff changes an undeclared tool surface; "
+            "routed to detect + verify."
+        ),
+    )
 
 
 def _coverage_next_action() -> AgentResultNextAction:
diff --git a/tests/test_codex_boundary_check.py b/tests/test_codex_boundary_check.py
index 287e6228..505aba20 100644
--- a/tests/test_codex_boundary_check.py
+++ b/tests/test_codex_boundary_check.py
@@ -246,6 +246,137 @@ def test_check_does_not_warn_on_docs_change_in_opted_in_repo(tmp_path: Path) ->
     assert result.decision == "allow"
 
 
+# --- Undeclared coverage gap: the trigger flags a tool-surface change the -----
+# manifest does not declare (or there is no manifest). verify cannot gate an
+# undeclared surface, so route to declare-then-verify rather than a clean allow.
+
+# A trigger context where a content-driven tool-surface rule fired (what the
+# real trigger emits for an MCP/tool-export change), distinct from the blanket
+# manifest-present force_run rule.
+_SURFACE_TRIGGER = {
+    "run_shipgate": True,
+    "matched_rules": [
+        {"id": "TRIGGER-MCP-EXPORT-CHANGED", "action": "run_shipgate"}
+    ],
+}
+# Manifest-present force_run only: in scope for "run shipgate" but NOT a
+# tool-surface change, so it must not trip the undeclared coverage warn.
+_FORCE_RUN_TRIGGER = {
+    "run_shipgate": True,
+    "matched_rules": [
+        {"id": "TRIGGER-EXISTING-MANIFEST-PRESENT", "action": "force_run"}
+    ],
+}
+
+
+def test_undeclared_tool_surface_in_scope_trigger_warns_and_routes_to_detect(
+    tmp_path: Path,
+) -> None:
+    result = evaluate_codex_boundary_result(
+        workspace=tmp_path,
+        diff_text=_TOOL_SOURCE_DIFF,
+        agent="claude-code",
+        trigger=_SURFACE_TRIGGER,
+        capability_surfaces_changed=None,
+    )
+    payload = result.model_dump(mode="json", exclude_none=True)
+    _validate(payload)
+    # Was a bare allow before the fix; now a warn that routes to detect/declare.
+    assert payload["decision"] == "warn"
+    assert payload["completion_allowed"] is True
+    assert payload["must_stop"] is False
+    assert payload["first_next_action"]["kind"] == "warn"
+    assert payload["first_next_action"]["command"].startswith("agents-shipgate detect")
+    assert any(d["code"] == "undeclared_capability_surface" for d in payload["diagnostics"])
+    assert any(t["step"] == "coverage" for t in payload["trace"])
+    assert payload["suggested_fixes"][0].startswith("agents-shipgate detect")
+    assert any(fix.startswith("agents-shipgate verify") for fix in payload["suggested_fixes"])
+
+
+def test_no_manifest_capability_add_via_check_warns_and_routes_to_detect(
+    tmp_path: Path,
+) -> None:
+    # End-to-end: empty workspace (no shipgate.yaml). build_codex_agent_result
+    # derives no declared surfaces, but the trigger flags the diff in scope.
+    result = build_codex_agent_result(
+        agent="claude-code",
+        workspace=tmp_path,
+        diff_text=_TOOL_SOURCE_DIFF,
+        config=Path("shipgate.yaml"),
+        policy=None,
+    )
+    assert result.decision == "warn"
+    assert result.first_next_action.command.startswith("agents-shipgate detect")
+
+
+def test_capability_add_to_undeclared_surface_warns_when_manifest_declares_other(
+    tmp_path: Path,
+) -> None:
+    # Manifest exists but declares a *different* tool source than the changed
+    # file. The declared-coverage path does not match, so the undeclared path
+    # must catch it (and route to detect, not a verify that never scans it).
+    _write_manifest(
+        tmp_path,
+        "  - id: other\n    type: mcp\n    path: other-tools.json\n    trust: internal\n",
+    )
+    result = build_codex_agent_result(
+        agent="claude-code",
+        workspace=tmp_path,
+        diff_text=_TOOL_SOURCE_DIFF,
+        config=Path("shipgate.yaml"),
+        policy=None,
+    )
+    assert result.decision == "warn"
+    assert result.first_next_action.command.startswith("agents-shipgate detect")
+    payload = result.model_dump(mode="json", exclude_none=True)
+    assert any(d["code"] == "undeclared_capability_surface" for d in payload["diagnostics"])
+
+
+def test_undeclared_gap_never_downgrades_a_block(tmp_path: Path) -> None:
+    # A real boundary block plus an in-scope trigger must stay blocked, not warn.
+    block_diff = (CORPUS / "mcp_auto_approve_write.diff").read_text(encoding="utf-8")
+    result = evaluate_codex_boundary_result(
+        workspace=tmp_path,
+        diff_text=block_diff,
+        agent="claude-code",
+        trigger=_SURFACE_TRIGGER,
+    )
+    assert result.decision == "block"
+
+
+def test_undeclared_gap_inactive_out_of_scope_or_when_release_decision_present(
+    tmp_path: Path,
+) -> None:
+    # Manifest-present force_run is "in scope" overall but is NOT a tool-surface
+    # change, so it must keep the clean allow (the "no noise on docs" property).
+    force_run_only = evaluate_codex_boundary_result(
+        workspace=tmp_path,
+        diff_text=_TOOL_SOURCE_DIFF,
+        agent="claude-code",
+        trigger=_FORCE_RUN_TRIGGER,
+    )
+    assert force_run_only.decision == "allow"
+
+    # No trigger context at all (bare call) preserves legacy behavior.
+    no_trigger = evaluate_codex_boundary_result(
+        workspace=tmp_path,
+        diff_text=_TOOL_SOURCE_DIFF,
+        agent="claude-code",
+    )
+    assert no_trigger.decision == "allow"
+
+    # A supplied release_decision means the full scan already ran; the
+    # projection governs and the boundary-only heuristic stays out of it.
+    scanned = evaluate_codex_boundary_result(
+        workspace=tmp_path,
+        diff_text=_TOOL_SOURCE_DIFF,
+        agent="claude-code",
+        trigger=_SURFACE_TRIGGER,
+        release_decision={"decision": "passed", "reason": "clean"},
+    )
+    assert scanned.decision == "allow"
+
+
 def test_codex_check_reads_diff_from_stdin(tmp_path: Path) -> None:
     diff_text = (CORPUS / "docs_only.diff").read_text(encoding="utf-8")
     result = runner.invoke(

From a721bbae15383e7d725732e9c4783b42946df432 Mon Sep 17 00:00:00 2001
From: Pengfei Hu <pengfei@threemoonslab.com>
Date: Mon, 22 Jun 2026 11:31:43 -0700
Subject: [PATCH 2/3] Classify undeclared tool surfaces per-file (review P1/P2)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Addresses two review findings on the boundary coverage-gap defer.

P1 — mixed declared + undeclared diffs only routed to `verify`. The undeclared
gap was gated on `not coverage_surfaces`, making declared and undeclared
mutually exclusive: a diff that changed a declared MCP export *and* an
undeclared `api/openapi.yaml` routed to `verify`, which never scans the
undeclared surface, so the new capability escaped the gate. Undeclared surfaces
are now computed independently, per-file, and take precedence — a mixed diff
routes to `detect` (declare-then-verify), which then covers both.

P2 — `_trigger_in_scope` treated every `run_shipgate` rule as a tool-surface
rule, so `shipgate.yaml` (TRIGGER-SHIPGATE-MANIFEST), `prompts/**`
(TRIGGER-PROMPTS-OR-POLICIES), and the CI gate were mislabeled
`undeclared_capability_surface` and routed to `detect`. Classification now keys
on the actual tool-source rules only (MCP export, OpenAPI, static tool
inventory, Codex plugin, n8n, @function_tool), excluding the manifest,
prompts/policies, CI workflow, and `.codex` boundary config (which the boundary
check itself evaluates).

Implementation: `build_codex_agent_result` classifies each changed file
per-file via the public trigger evaluator (glob rules match the path;
diff_contains rules match the file's added lines), excluding boundary paths and
already-declared files, and passes an explicit `undeclared_capability_surfaces`
list to `evaluate_codex_boundary_result`. The coarse `_trigger_in_scope` +
`is_boundary_path` heuristic is removed in favor of that explicit list.

Tests: added mixed declared+undeclared cases (direct + via `check`) and
manifest/prompts no-false-detect cases. Full suite green.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 src/agents_shipgate/cli/agent_result.py    |  85 ++++++++++-
 src/agents_shipgate/core/codex_boundary.py | 106 +++++--------
 tests/test_codex_boundary_check.py         | 167 +++++++++++++++------
 3 files changed, 243 insertions(+), 115 deletions(-)

diff --git a/src/agents_shipgate/cli/agent_result.py b/src/agents_shipgate/cli/agent_result.py
index 0237164a..4d361ba0 100644
--- a/src/agents_shipgate/cli/agent_result.py
+++ b/src/agents_shipgate/cli/agent_result.py
@@ -11,9 +11,28 @@
     parse_unified_diff,
 )
 from agents_shipgate.schemas.codex_boundary_result import CodexBoundaryResultV1
-from agents_shipgate.triggers import _git_diff_context
+from agents_shipgate.triggers import _git_diff_context, load_triggers
 from agents_shipgate.triggers import evaluate as evaluate_trigger
 
+# Trigger rule IDs that mark a changed file as a *tool/capability source* —
+# the surfaces ``verify`` compiles into the capability delta. Deliberately
+# EXCLUDES the other ``run_shipgate`` rules that are not declarable tool
+# sources: ``TRIGGER-SHIPGATE-MANIFEST`` (the manifest itself),
+# ``TRIGGER-PROMPTS-OR-POLICIES`` (prompts/policies), ``TRIGGER-SHIPGATE-CI-
+# WORKFLOW`` (the gate), and ``TRIGGER-CODEX-BOUNDARY-CONFIG-CHANGED`` (which
+# the boundary check itself evaluates). A change to one of those must not be
+# mislabeled an "undeclared tool surface".
+_TOOL_SOURCE_TRIGGER_IDS = frozenset(
+    {
+        "TRIGGER-MCP-EXPORT-CHANGED",
+        "TRIGGER-OPENAPI-SPEC-CHANGED",
+        "TRIGGER-STATIC-TOOL-INVENTORY-CHANGED",
+        "TRIGGER-CODEX-PLUGIN-CHANGED",
+        "TRIGGER-N8N-WORKFLOW-CHANGED",
+        "TRIGGER-FUNCTION-TOOL-DECORATOR",
+    }
+)
+
 
 def build_codex_agent_result(
     *,
@@ -24,7 +43,8 @@ def build_codex_agent_result(
     policy: Path | None,
 ) -> CodexBoundaryResultV1:
     workspace = workspace.resolve()
-    changed_files = sorted({item.path for item in parse_unified_diff(diff_text) if item.path})
+    diff_files = parse_unified_diff(diff_text)
+    changed_files = sorted({item.path for item in diff_files if item.path})
     config_path = config if config.is_absolute() else workspace / config
     trigger = evaluate_trigger(
         paths=changed_files,
@@ -32,20 +52,75 @@ def build_codex_agent_result(
         manifest_present=config_path.is_file(),
         user_requested=True,
     )
+    declared = _declared_tool_surfaces_changed(
+        workspace=workspace,
+        config_path=config_path,
+        changed_files=changed_files,
+    )
     return evaluate_codex_boundary_result(
         workspace=workspace,
         diff_text=diff_text,
         agent=agent,
         policy_path=policy,
         trigger=trigger,
-        capability_surfaces_changed=_declared_tool_surfaces_changed(
-            workspace=workspace,
-            config_path=config_path,
+        capability_surfaces_changed=declared,
+        undeclared_capability_surfaces=_undeclared_tool_surfaces_changed(
+            diff_files=diff_files,
             changed_files=changed_files,
+            declared=declared,
         ),
     )
 
 
+def _undeclared_tool_surfaces_changed(
+    *,
+    diff_files: list[Any],
+    changed_files: list[str],
+    declared: list[str],
+) -> list[str]:
+    """Changed files that are tool/capability surfaces the manifest does NOT declare.
+
+    ``verify`` only gates *declared* tool sources, so an undeclared surface (a
+    new MCP/OpenAPI/tool-inventory/codex-plugin file, or an SDK/n8n tool the
+    manifest does not list) escapes the gate even though ``check`` returns a
+    clean ``allow``. Each changed file is classified per-file against only the
+    *tool-source* trigger rules (so the manifest, prompts/policies, the CI gate,
+    and ``.codex`` boundary config are never mislabeled), excluding boundary
+    paths (the boundary evaluator already inspects those) and files the manifest
+    already declares (``verify`` gates those). Computed independently of the
+    declared set so a mixed diff — one declared surface plus one undeclared —
+    still surfaces the undeclared one.
+    """
+    if not changed_files:
+        return []
+    declared_set = set(declared)
+    added_by_path = {
+        item.path: "\n".join(getattr(item, "added_lines", []) or [])
+        for item in diff_files
+        if item.path
+    }
+    catalog = load_triggers()
+    undeclared: list[str] = []
+    for path in changed_files:
+        if path in declared_set or is_boundary_path(path):
+            continue
+        # Per-file classification: a glob rule matches on the path, a
+        # diff_contains rule (n8n, @function_tool) on this file's added lines.
+        result = evaluate_trigger(
+            paths=[path],
+            diff_text=added_by_path.get(path, ""),
+            manifest_present=False,
+            user_requested=False,
+            triggers=catalog,
+        )
+        if any(
+            rule.get("id") in _TOOL_SOURCE_TRIGGER_IDS
+            for rule in result.get("matched_rules", [])
+        ):
+            undeclared.append(path)
+    return sorted(dict.fromkeys(undeclared))
+
+
 def _declared_tool_surfaces_changed(
     *,
     workspace: Path,
diff --git a/src/agents_shipgate/core/codex_boundary.py b/src/agents_shipgate/core/codex_boundary.py
index aab63100..62fec0ea 100644
--- a/src/agents_shipgate/core/codex_boundary.py
+++ b/src/agents_shipgate/core/codex_boundary.py
@@ -376,16 +376,24 @@ def evaluate_codex_boundary_result(
     trigger: dict[str, Any] | None = None,
     release_decision: dict[str, Any] | None = None,
     capability_surfaces_changed: list[str] | None = None,
+    undeclared_capability_surfaces: list[str] | None = None,
 ) -> AgentResultV1:
     """Return the local Codex boundary-result projection for a unified diff.
 
-    ``capability_surfaces_changed`` lists changed files that the manifest
-    declares as tool sources. The boundary evaluator does not inspect tool
-    surfaces (only ``verify`` computes the capability delta), so when one
-    changed and the boundary result is otherwise a clean ``allow``, the
-    result is escalated to ``warn`` and routed to ``verify`` rather than
-    green-lighting a capability change ``check`` never evaluated. This keeps
-    ``check`` from disagreeing with the ``release_decision.decision`` gate.
+    The boundary evaluator does not inspect tool surfaces (only ``verify``
+    computes the capability delta), so a clean ``allow`` over a changed tool
+    surface is escalated to ``warn`` rather than green-lighting a capability
+    change ``check`` never evaluated. This keeps ``check`` from disagreeing
+    with the ``release_decision.decision`` gate. Two inputs drive it (both
+    pre-computed by ``build_codex_agent_result``):
+
+    - ``capability_surfaces_changed`` — changed files the manifest DECLARES as
+      tool sources. ``verify`` will gate these, so route to ``verify``.
+    - ``undeclared_capability_surfaces`` — changed files that ARE tool surfaces
+      but the manifest does not declare (or there is no manifest). ``verify``
+      cannot gate an undeclared surface, so route to declare-then-verify
+      (``detect``). Takes precedence when a diff changes both, since ``verify``
+      alone would miss the undeclared one.
     """
 
     # Keep this local diff projector aligned with
@@ -461,32 +469,17 @@ def add(rule_id: str, *, path: str | None, evidence: dict[str, Any]) -> None:
     # that touches a tool/capability surface would silently green-light a
     # capability change that only ``verify`` gates. Escalate to ``warn`` and
     # route onward. Gated on ``release_decision is None`` because a provided
-    # release decision means the full capability scan already ran.
-    #
-    # Two sub-cases:
-    #   * ``coverage_gap`` — the changed surface is one the manifest DECLARES as
-    #     a tool source, so ``verify`` will gate it: route straight to verify.
-    #   * ``undeclared_gap`` — the trigger flags the diff as a tool-surface
-    #     change but the manifest declares no matching source (or there is no
-    #     manifest), so ``verify`` cannot gate it yet: route to declare-then-
-    #     verify instead of a misleading clean ``verify`` over a surface it
-    #     never scans.
-    #
-    # The undeclared case requires a changed file the boundary evaluator does
-    # NOT itself inspect (``not is_boundary_path``): a clean ``allow`` over a
-    # purely Codex-local surface (``.codex/config.toml``, ``AGENTS.md``, the
-    # Shipgate workflow) is authoritative — ``check`` already evaluated it — so
-    # it is not a coverage gap. Same exclusion ``_declared_tool_surfaces_changed``
-    # applies to declared sources.
+    # release decision means the full capability scan already ran. The two
+    # surface lists are classified per-file upstream (declared vs undeclared);
+    # see the docstring. ``undeclared_gap`` is computed independently of the
+    # declared set and takes precedence so a mixed diff routes to
+    # declare-then-verify rather than a ``verify`` that misses the undeclared
+    # surface.
     boundary_clean_allow = decision == "allow" and release_decision is None
     coverage_surfaces = sorted(dict.fromkeys(capability_surfaces_changed or []))
+    undeclared_surfaces = sorted(dict.fromkeys(undeclared_capability_surfaces or []))
     coverage_gap = boundary_clean_allow and bool(coverage_surfaces)
-    undeclared_gap = (
-        boundary_clean_allow
-        and not coverage_surfaces
-        and _trigger_in_scope(trigger)
-        and any(not is_boundary_path(path) for path in changed_files)
-    )
+    undeclared_gap = boundary_clean_allow and bool(undeclared_surfaces)
     if coverage_gap or undeclared_gap:
         decision = "warn"
 
@@ -501,18 +494,18 @@ def add(rule_id: str, *, path: str | None, evidence: dict[str, Any]) -> None:
         finding_fingerprints=finding_fingerprints,
         evaluated_files=evaluated_files,
     )
-    if coverage_gap:
+    if undeclared_gap:
+        first_next_action = _undeclared_next_action()
+        summary = _undeclared_summary(undeclared_surfaces)
+        diagnostics = [*diagnostics, _undeclared_diagnostic(undeclared_surfaces)]
+        trace = [*_trace_for(policy, decision, violations), _undeclared_trace(undeclared_surfaces)]
+        suggested_fixes = [_DETECT_COMMAND, _VERIFY_COMMAND]
+    elif coverage_gap:
         first_next_action = _coverage_next_action()
         summary = _coverage_summary(coverage_surfaces)
         diagnostics = [*diagnostics, _coverage_diagnostic(coverage_surfaces)]
         trace = [*_trace_for(policy, decision, violations), _coverage_trace(coverage_surfaces)]
         suggested_fixes = [_VERIFY_COMMAND]
-    elif undeclared_gap:
-        first_next_action = _undeclared_next_action()
-        summary = _undeclared_summary()
-        diagnostics = [*diagnostics, _undeclared_diagnostic()]
-        trace = [*_trace_for(policy, decision, violations), _undeclared_trace()]
-        suggested_fixes = [_DETECT_COMMAND, _VERIFY_COMMAND]
     else:
         first_next_action = _next_action_for(decision, violations, repair)
         summary = _summary_for(decision, violations)
@@ -1370,26 +1363,6 @@ def _risk_for(violations: list[AgentResultViolatedRule]) -> AgentResultRiskLevel
 _DETECT_COMMAND = "agents-shipgate detect --json"
 
 
-def _trigger_in_scope(trigger: dict[str, Any] | None) -> bool:
-    """True when a CONTENT-driven trigger rule flags a tool/capability-surface change.
-
-    Keys on a matched rule whose ``action`` is ``run_shipgate`` — the
-    path/diff-driven tool-surface rules (MCP export, tool decorator, OpenAPI,
-    framework bump). Deliberately NOT the top-level ``run_shipgate`` boolean:
-    that is also ``True`` for the blanket ``force_run`` rule
-    (``TRIGGER-EXISTING-MANIFEST-PRESENT``), which fires on every diff in an
-    opted-in repo and would turn a docs-only edit into a coverage warn. ``None``
-    (a bare function call with no trigger context) is out of scope, so the
-    boundary evaluator keeps its prior behavior when called directly.
-    """
-    if not isinstance(trigger, dict):
-        return False
-    return any(
-        isinstance(rule, dict) and rule.get("action") == "run_shipgate"
-        for rule in trigger.get("matched_rules") or []
-    )
-
-
 def _undeclared_next_action() -> AgentResultNextAction:
     return AgentResultNextAction(
         actor="coding_agent",
@@ -1403,31 +1376,32 @@ def _undeclared_next_action() -> AgentResultNextAction:
     )
 
 
-def _undeclared_summary() -> str:
+def _undeclared_summary(surfaces: list[str]) -> str:
     return (
         "No Codex boundary rule fired, but the diff changes a tool/capability surface "
-        "that shipgate.yaml does not declare, so verify cannot gate it yet. Declare it "
-        "(detect or tool_sources) and run verify before reporting completion."
+        f"({', '.join(surfaces[:5])}) that shipgate.yaml does not declare, so verify "
+        "cannot gate it yet. Declare it (detect or tool_sources) and run verify before "
+        "reporting completion."
     )
 
 
-def _undeclared_diagnostic() -> AgentResultDiagnostic:
+def _undeclared_diagnostic(surfaces: list[str]) -> AgentResultDiagnostic:
     return AgentResultDiagnostic(
         level="warning",
         code="undeclared_capability_surface",
         message=(
-            "A changed tool/capability surface is not declared in shipgate.yaml; "
-            "check is boundary-only and verify only gates declared surfaces. Declare "
-            "the surface, then verify."
+            "Undeclared tool/capability surface(s) "
+            f"{', '.join(surfaces[:5])} changed; check is boundary-only and verify "
+            "only gates declared surfaces. Declare the surface, then verify."
         ),
     )
 
 
-def _undeclared_trace() -> AgentResultTraceEvent:
+def _undeclared_trace(surfaces: list[str]) -> AgentResultTraceEvent:
     return AgentResultTraceEvent(
         step="coverage",
         summary=(
-            "boundary_only: in-scope diff changes an undeclared tool surface; "
+            f"boundary_only: {len(surfaces)} undeclared tool surface(s) changed; "
             "routed to detect + verify."
         ),
     )
diff --git a/tests/test_codex_boundary_check.py b/tests/test_codex_boundary_check.py
index 505aba20..4c025eeb 100644
--- a/tests/test_codex_boundary_check.py
+++ b/tests/test_codex_boundary_check.py
@@ -246,38 +246,29 @@ def test_check_does_not_warn_on_docs_change_in_opted_in_repo(tmp_path: Path) ->
     assert result.decision == "allow"
 
 
-# --- Undeclared coverage gap: the trigger flags a tool-surface change the -----
-# manifest does not declare (or there is no manifest). verify cannot gate an
-# undeclared surface, so route to declare-then-verify rather than a clean allow.
-
-# A trigger context where a content-driven tool-surface rule fired (what the
-# real trigger emits for an MCP/tool-export change), distinct from the blanket
-# manifest-present force_run rule.
-_SURFACE_TRIGGER = {
-    "run_shipgate": True,
-    "matched_rules": [
-        {"id": "TRIGGER-MCP-EXPORT-CHANGED", "action": "run_shipgate"}
-    ],
-}
-# Manifest-present force_run only: in scope for "run shipgate" but NOT a
-# tool-surface change, so it must not trip the undeclared coverage warn.
-_FORCE_RUN_TRIGGER = {
-    "run_shipgate": True,
-    "matched_rules": [
-        {"id": "TRIGGER-EXISTING-MANIFEST-PRESENT", "action": "force_run"}
-    ],
-}
+# --- Undeclared coverage gap: a changed file IS a tool surface but the --------
+# manifest does not declare it (or there is no manifest). verify only gates
+# declared surfaces, so route to declare-then-verify (detect) rather than a
+# clean allow or a verify that never scans it.
+
+# A second changed file that is an *undeclared* tool surface (an OpenAPI spec),
+# used to exercise mixed declared+undeclared diffs (review finding P1).
+_MIXED_TOOL_SOURCE_DIFF = _TOOL_SOURCE_DIFF + (
+    "diff --git a/api/openapi.yaml b/api/openapi.yaml\n"
+    "--- a/api/openapi.yaml\n"
+    "+++ b/api/openapi.yaml\n"
+    "@@ -1 +1,2 @@\n"
+    " openapi: 3.0.0\n"
+    "+paths: {}\n"
+)
 
 
-def test_undeclared_tool_surface_in_scope_trigger_warns_and_routes_to_detect(
-    tmp_path: Path,
-) -> None:
+def test_undeclared_surface_warns_and_routes_to_detect(tmp_path: Path) -> None:
     result = evaluate_codex_boundary_result(
         workspace=tmp_path,
         diff_text=_TOOL_SOURCE_DIFF,
         agent="claude-code",
-        trigger=_SURFACE_TRIGGER,
-        capability_surfaces_changed=None,
+        undeclared_capability_surfaces=["mcp-tools.json"],
     )
     payload = result.model_dump(mode="json", exclude_none=True)
     _validate(payload)
@@ -293,11 +284,30 @@ def test_undeclared_tool_surface_in_scope_trigger_warns_and_routes_to_detect(
     assert any(fix.startswith("agents-shipgate verify") for fix in payload["suggested_fixes"])
 
 
+def test_mixed_declared_and_undeclared_routes_to_detect(tmp_path: Path) -> None:
+    # Review finding P1: a diff that changes BOTH a declared surface (verify
+    # gates it) and an undeclared one (verify does not) must route to detect —
+    # declare-then-verify — not a verify that silently misses the undeclared
+    # surface. Undeclared takes precedence over the declared coverage gap.
+    result = evaluate_codex_boundary_result(
+        workspace=tmp_path,
+        diff_text=_MIXED_TOOL_SOURCE_DIFF,
+        agent="claude-code",
+        capability_surfaces_changed=["mcp-tools.json"],
+        undeclared_capability_surfaces=["api/openapi.yaml"],
+    )
+    assert result.decision == "warn"
+    assert result.first_next_action.command.startswith("agents-shipgate detect")
+    payload = result.model_dump(mode="json", exclude_none=True)
+    diag = next(d for d in payload["diagnostics"] if d["code"] == "undeclared_capability_surface")
+    assert "api/openapi.yaml" in diag["message"]
+
+
 def test_no_manifest_capability_add_via_check_warns_and_routes_to_detect(
     tmp_path: Path,
 ) -> None:
     # End-to-end: empty workspace (no shipgate.yaml). build_codex_agent_result
-    # derives no declared surfaces, but the trigger flags the diff in scope.
+    # classifies mcp-tools.json as an undeclared tool surface.
     result = build_codex_agent_result(
         agent="claude-code",
         workspace=tmp_path,
@@ -332,38 +342,107 @@ def test_capability_add_to_undeclared_surface_warns_when_manifest_declares_other
     assert any(d["code"] == "undeclared_capability_surface" for d in payload["diagnostics"])
 
 
+def test_mixed_declared_and_undeclared_via_check_routes_to_detect(
+    tmp_path: Path,
+) -> None:
+    # Review finding P1, end-to-end through build_codex_agent_result: manifest
+    # declares mcp-tools.json; the diff also adds an undeclared OpenAPI spec.
+    _write_manifest(
+        tmp_path,
+        "  - id: mcp\n    type: mcp\n    path: mcp-tools.json\n    trust: internal\n",
+    )
+    result = build_codex_agent_result(
+        agent="claude-code",
+        workspace=tmp_path,
+        diff_text=_MIXED_TOOL_SOURCE_DIFF,
+        config=Path("shipgate.yaml"),
+        policy=None,
+    )
+    assert result.decision == "warn"
+    assert result.first_next_action.command.startswith("agents-shipgate detect")
+    payload = result.model_dump(mode="json", exclude_none=True)
+    diag = next(d for d in payload["diagnostics"] if d["code"] == "undeclared_capability_surface")
+    assert "api/openapi.yaml" in diag["message"]
+
+
+def test_manifest_edit_is_not_an_undeclared_tool_surface(tmp_path: Path) -> None:
+    # Review finding P2: editing shipgate.yaml in an opted-in repo fires a
+    # run_shipgate trigger rule (TRIGGER-SHIPGATE-MANIFEST) but is NOT a
+    # declarable tool source, so it must not be reported as an undeclared
+    # surface routed to detect.
+    _write_manifest(
+        tmp_path,
+        "  - id: mcp\n    type: mcp\n    path: mcp-tools.json\n    trust: internal\n",
+    )
+    diff = (
+        "diff --git a/shipgate.yaml b/shipgate.yaml\n"
+        "--- a/shipgate.yaml\n"
+        "+++ b/shipgate.yaml\n"
+        "@@ -1 +1,2 @@\n"
+        ' version: "0.1"\n'
+        "+# touch\n"
+    )
+    result = build_codex_agent_result(
+        agent="claude-code",
+        workspace=tmp_path,
+        diff_text=diff,
+        config=Path("shipgate.yaml"),
+        policy=None,
+    )
+    assert result.decision == "allow"
+    payload = result.model_dump(mode="json", exclude_none=True)
+    assert not any(
+        d["code"] == "undeclared_capability_surface" for d in payload.get("diagnostics", [])
+    )
+
+
+def test_prompts_edit_is_not_an_undeclared_tool_surface(tmp_path: Path) -> None:
+    # Review finding P2: a prompts/ edit fires TRIGGER-PROMPTS-OR-POLICIES but
+    # is not a declarable tool source — no undeclared-surface warn / detect.
+    _write_manifest(
+        tmp_path,
+        "  - id: mcp\n    type: mcp\n    path: mcp-tools.json\n    trust: internal\n",
+    )
+    diff = (
+        "diff --git a/prompts/system.md b/prompts/system.md\n"
+        "--- a/prompts/system.md\n"
+        "+++ b/prompts/system.md\n"
+        "@@ -1 +1,2 @@\n"
+        " You are helpful.\n"
+        "+Be concise.\n"
+    )
+    result = build_codex_agent_result(
+        agent="claude-code",
+        workspace=tmp_path,
+        diff_text=diff,
+        config=Path("shipgate.yaml"),
+        policy=None,
+    )
+    assert result.decision == "allow"
+
+
 def test_undeclared_gap_never_downgrades_a_block(tmp_path: Path) -> None:
-    # A real boundary block plus an in-scope trigger must stay blocked, not warn.
+    # A real boundary block plus an undeclared surface must stay blocked.
     block_diff = (CORPUS / "mcp_auto_approve_write.diff").read_text(encoding="utf-8")
     result = evaluate_codex_boundary_result(
         workspace=tmp_path,
         diff_text=block_diff,
         agent="claude-code",
-        trigger=_SURFACE_TRIGGER,
+        undeclared_capability_surfaces=["mcp-tools.json"],
     )
     assert result.decision == "block"
 
 
-def test_undeclared_gap_inactive_out_of_scope_or_when_release_decision_present(
+def test_undeclared_gap_inactive_without_signal_or_when_release_decision_present(
     tmp_path: Path,
 ) -> None:
-    # Manifest-present force_run is "in scope" overall but is NOT a tool-surface
-    # change, so it must keep the clean allow (the "no noise on docs" property).
-    force_run_only = evaluate_codex_boundary_result(
-        workspace=tmp_path,
-        diff_text=_TOOL_SOURCE_DIFF,
-        agent="claude-code",
-        trigger=_FORCE_RUN_TRIGGER,
-    )
-    assert force_run_only.decision == "allow"
-
-    # No trigger context at all (bare call) preserves legacy behavior.
-    no_trigger = evaluate_codex_boundary_result(
+    # No undeclared surfaces supplied (bare call) preserves the clean allow.
+    no_signal = evaluate_codex_boundary_result(
         workspace=tmp_path,
         diff_text=_TOOL_SOURCE_DIFF,
         agent="claude-code",
     )
-    assert no_trigger.decision == "allow"
+    assert no_signal.decision == "allow"
 
     # A supplied release_decision means the full scan already ran; the
     # projection governs and the boundary-only heuristic stays out of it.
@@ -371,7 +450,7 @@ def test_undeclared_gap_inactive_out_of_scope_or_when_release_decision_present(
         workspace=tmp_path,
         diff_text=_TOOL_SOURCE_DIFF,
         agent="claude-code",
-        trigger=_SURFACE_TRIGGER,
+        undeclared_capability_surfaces=["mcp-tools.json"],
         release_decision={"decision": "passed", "reason": "clean"},
     )
     assert scanned.decision == "allow"

From 9528237d389eeac987fe73054e363bbf1a728dcd Mon Sep 17 00:00:00 2001
From: Pengfei Hu <pengfei@threemoonslab.com>
Date: Mon, 22 Jun 2026 22:51:34 -0700
Subject: [PATCH 3/3] Respect the trigger's skip verdict in undeclared-surface
 classification
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

`_undeclared_tool_surfaces_changed` treated any matched tool-source rule as a
tool surface, ignoring the evaluator's winning verdict. A docs or test file
that incidentally mentions `@tool` / `@function_tool` matches
`TRIGGER-FUNCTION-TOOL-DECORATOR` but ALSO `TRIGGER-DOCS-ONLY-NEGATIVE`, which
beats `run_shipgate` — so the catalog skips it. The classifier nonetheless
flagged `README.md` / `tests/test_*.py` as an `undeclared_capability_surface`
and routed to `detect`, contradicting the trigger catalog
(docs/triggers.json: docs/tests that incidentally mention `@tool` must not run).

Require the per-file trigger to actually run (`run_shipgate` true) in addition
to a tool-source rule appearing in `matched_rules`. A real `@function_tool`
source (run_shipgate true) still classifies; a docs/test mention (skip) no
longer does.

Tests: docs file and tests/ file that mention the decorator stay `allow`.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 src/agents_shipgate/cli/agent_result.py |  7 ++-
 tests/test_codex_boundary_check.py      | 59 +++++++++++++++++++++++++
 2 files changed, 65 insertions(+), 1 deletion(-)

diff --git a/src/agents_shipgate/cli/agent_result.py b/src/agents_shipgate/cli/agent_result.py
index 4d361ba0..d109f5eb 100644
--- a/src/agents_shipgate/cli/agent_result.py
+++ b/src/agents_shipgate/cli/agent_result.py
@@ -113,7 +113,12 @@ def _undeclared_tool_surfaces_changed(
             user_requested=False,
             triggers=catalog,
         )
-        if any(
+        # Require the evaluator's WINNING verdict, not just a matched rule: a
+        # docs/test file that incidentally mentions ``@tool`` also matches
+        # ``TRIGGER-DOCS-ONLY-NEGATIVE``, which beats ``run_shipgate`` so the
+        # catalog skips it. Only treat the file as a tool surface when the
+        # trigger actually runs AND a tool-source rule is what carried it.
+        if result.get("run_shipgate") and any(
             rule.get("id") in _TOOL_SOURCE_TRIGGER_IDS
             for rule in result.get("matched_rules", [])
         ):
diff --git a/tests/test_codex_boundary_check.py b/tests/test_codex_boundary_check.py
index 4c025eeb..f6e0786a 100644
--- a/tests/test_codex_boundary_check.py
+++ b/tests/test_codex_boundary_check.py
@@ -421,6 +421,65 @@ def test_prompts_edit_is_not_an_undeclared_tool_surface(tmp_path: Path) -> None:
     assert result.decision == "allow"
 
 
+def test_docs_file_mentioning_tool_decorator_is_not_undeclared_surface(
+    tmp_path: Path,
+) -> None:
+    # Review finding: a docs file that incidentally mentions @tool matches the
+    # FUNCTION-TOOL-DECORATOR rule but ALSO TRIGGER-DOCS-ONLY-NEGATIVE, which
+    # wins — the catalog skips it, so it must not be flagged as an undeclared
+    # tool surface routed to detect.
+    _write_manifest(
+        tmp_path,
+        "  - id: mcp\n    type: mcp\n    path: mcp-tools.json\n    trust: internal\n",
+    )
+    diff = (
+        "diff --git a/README.md b/README.md\n"
+        "--- a/README.md\n"
+        "+++ b/README.md\n"
+        "@@ -1 +1,2 @@\n"
+        " # Docs\n"
+        "+Use the @tool / @function_tool decorator to register a tool.\n"
+    )
+    result = build_codex_agent_result(
+        agent="claude-code",
+        workspace=tmp_path,
+        diff_text=diff,
+        config=Path("shipgate.yaml"),
+        policy=None,
+    )
+    assert result.decision == "allow"
+    payload = result.model_dump(mode="json", exclude_none=True)
+    assert not any(
+        d["code"] == "undeclared_capability_surface" for d in payload.get("diagnostics", [])
+    )
+
+
+def test_test_file_mentioning_tool_decorator_is_not_undeclared_surface(
+    tmp_path: Path,
+) -> None:
+    # Same property for a tests/ file: docs-only-negative covers tests/**.
+    _write_manifest(
+        tmp_path,
+        "  - id: mcp\n    type: mcp\n    path: mcp-tools.json\n    trust: internal\n",
+    )
+    diff = (
+        "diff --git a/tests/test_docs.py b/tests/test_docs.py\n"
+        "--- a/tests/test_docs.py\n"
+        "+++ b/tests/test_docs.py\n"
+        "@@ -1 +1,2 @@\n"
+        " def test_x():\n"
+        "+    # documents the @function_tool example\n"
+    )
+    result = build_codex_agent_result(
+        agent="claude-code",
+        workspace=tmp_path,
+        diff_text=diff,
+        config=Path("shipgate.yaml"),
+        policy=None,
+    )
+    assert result.decision == "allow"
+
+
 def test_undeclared_gap_never_downgrades_a_block(tmp_path: Path) -> None:
     # A real boundary block plus an undeclared surface must stay blocked.
     block_diff = (CORPUS / "mcp_auto_approve_write.diff").read_text(encoding="utf-8")