ThreeMoonsLab · pengfei-threemoonslab · Jun 23, 2026 · Jun 22, 2026 · Jun 22, 2026 · Jun 23, 2026
diff --git a/src/agents_shipgate/cli/agent_result.py b/src/agents_shipgate/cli/agent_result.py
@@ -11,9 +11,28 @@
     parse_unified_diff,
 )
 from agents_shipgate.schemas.codex_boundary_result import CodexBoundaryResultV1
-from agents_shipgate.triggers import _git_diff_context
+from agents_shipgate.triggers import _git_diff_context, load_triggers
 from agents_shipgate.triggers import evaluate as evaluate_trigger
 
+# Trigger rule IDs that mark a changed file as a *tool/capability source* —
+# the surfaces ``verify`` compiles into the capability delta. Deliberately
+# EXCLUDES the other ``run_shipgate`` rules that are not declarable tool
+# sources: ``TRIGGER-SHIPGATE-MANIFEST`` (the manifest itself),
+# ``TRIGGER-PROMPTS-OR-POLICIES`` (prompts/policies), ``TRIGGER-SHIPGATE-CI-
+# WORKFLOW`` (the gate), and ``TRIGGER-CODEX-BOUNDARY-CONFIG-CHANGED`` (which
+# the boundary check itself evaluates). A change to one of those must not be
+# mislabeled an "undeclared tool surface".
+_TOOL_SOURCE_TRIGGER_IDS = frozenset(
+    {
+        "TRIGGER-MCP-EXPORT-CHANGED",
+        "TRIGGER-OPENAPI-SPEC-CHANGED",
+        "TRIGGER-STATIC-TOOL-INVENTORY-CHANGED",
+        "TRIGGER-CODEX-PLUGIN-CHANGED",
+        "TRIGGER-N8N-WORKFLOW-CHANGED",
+        "TRIGGER-FUNCTION-TOOL-DECORATOR",
+    }
+)
+
 
 def build_codex_agent_result(
     *,
@@ -24,28 +43,89 @@ def build_codex_agent_result(
     policy: Path | None,
 ) -> CodexBoundaryResultV1:
     workspace = workspace.resolve()
-    changed_files = sorted({item.path for item in parse_unified_diff(diff_text) if item.path})
+    diff_files = parse_unified_diff(diff_text)
+    changed_files = sorted({item.path for item in diff_files if item.path})
     config_path = config if config.is_absolute() else workspace / config
     trigger = evaluate_trigger(
         paths=changed_files,
         diff_text=diff_text,
         manifest_present=config_path.is_file(),
         user_requested=True,
     )
+    declared = _declared_tool_surfaces_changed(
+        workspace=workspace,
+        config_path=config_path,
+        changed_files=changed_files,
+    )
     return evaluate_codex_boundary_result(
         workspace=workspace,
         diff_text=diff_text,
         agent=agent,
         policy_path=policy,
         trigger=trigger,
-        capability_surfaces_changed=_declared_tool_surfaces_changed(
-            workspace=workspace,
-            config_path=config_path,
+        capability_surfaces_changed=declared,
+        undeclared_capability_surfaces=_undeclared_tool_surfaces_changed(
+            diff_files=diff_files,
             changed_files=changed_files,
+            declared=declared,
         ),
     )
 
 
+def _undeclared_tool_surfaces_changed(
+    *,
+    diff_files: list[Any],
+    changed_files: list[str],
+    declared: list[str],
+) -> list[str]:
+    """Changed files that are tool/capability surfaces the manifest does NOT declare.
+
+    ``verify`` only gates *declared* tool sources, so an undeclared surface (a
+    new MCP/OpenAPI/tool-inventory/codex-plugin file, or an SDK/n8n tool the
+    manifest does not list) escapes the gate even though ``check`` returns a
+    clean ``allow``. Each changed file is classified per-file against only the
+    *tool-source* trigger rules (so the manifest, prompts/policies, the CI gate,
+    and ``.codex`` boundary config are never mislabeled), excluding boundary
+    paths (the boundary evaluator already inspects those) and files the manifest
+    already declares (``verify`` gates those). Computed independently of the
+    declared set so a mixed diff — one declared surface plus one undeclared —
+    still surfaces the undeclared one.
+    """
+    if not changed_files:
+        return []
+    declared_set = set(declared)
+    added_by_path = {
+        item.path: "\n".join(getattr(item, "added_lines", []) or [])
+        for item in diff_files
+        if item.path
+    }
+    catalog = load_triggers()
+    undeclared: list[str] = []
+    for path in changed_files:
+        if path in declared_set or is_boundary_path(path):
+            continue
+        # Per-file classification: a glob rule matches on the path, a
+        # diff_contains rule (n8n, @function_tool) on this file's added lines.
+        result = evaluate_trigger(
+            paths=[path],
+            diff_text=added_by_path.get(path, ""),
+            manifest_present=False,
+            user_requested=False,
+            triggers=catalog,
+        )
+        # Require the evaluator's WINNING verdict, not just a matched rule: a
+        # docs/test file that incidentally mentions ``@tool`` also matches
+        # ``TRIGGER-DOCS-ONLY-NEGATIVE``, which beats ``run_shipgate`` so the
+        # catalog skips it. Only treat the file as a tool surface when the
+        # trigger actually runs AND a tool-source rule is what carried it.
+        if result.get("run_shipgate") and any(
+            rule.get("id") in _TOOL_SOURCE_TRIGGER_IDS
+            for rule in result.get("matched_rules", [])
+        ):
+            undeclared.append(path)
+    return sorted(dict.fromkeys(undeclared))
+
+
 def _declared_tool_surfaces_changed(
     *,
     workspace: Path,

diff --git a/src/agents_shipgate/core/codex_boundary.py b/src/agents_shipgate/core/codex_boundary.py
@@ -376,16 +376,24 @@ def evaluate_codex_boundary_result(
     trigger: dict[str, Any] | None = None,
     release_decision: dict[str, Any] | None = None,
     capability_surfaces_changed: list[str] | None = None,
+    undeclared_capability_surfaces: list[str] | None = None,
 ) -> AgentResultV1:
     """Return the local Codex boundary-result projection for a unified diff.
 
-    ``capability_surfaces_changed`` lists changed files that the manifest
-    declares as tool sources. The boundary evaluator does not inspect tool
-    surfaces (only ``verify`` computes the capability delta), so when one
-    changed and the boundary result is otherwise a clean ``allow``, the
-    result is escalated to ``warn`` and routed to ``verify`` rather than
-    green-lighting a capability change ``check`` never evaluated. This keeps
-    ``check`` from disagreeing with the ``release_decision.decision`` gate.
+    The boundary evaluator does not inspect tool surfaces (only ``verify``
+    computes the capability delta), so a clean ``allow`` over a changed tool
+    surface is escalated to ``warn`` rather than green-lighting a capability
+    change ``check`` never evaluated. This keeps ``check`` from disagreeing
+    with the ``release_decision.decision`` gate. Two inputs drive it (both
+    pre-computed by ``build_codex_agent_result``):
+
+    - ``capability_surfaces_changed`` — changed files the manifest DECLARES as
+      tool sources. ``verify`` will gate these, so route to ``verify``.
+    - ``undeclared_capability_surfaces`` — changed files that ARE tool surfaces
+      but the manifest does not declare (or there is no manifest). ``verify``
+      cannot gate an undeclared surface, so route to declare-then-verify
+      (``detect``). Takes precedence when a diff changes both, since ``verify``
+      alone would miss the undeclared one.
     """
 
     # Keep this local diff projector aligned with
@@ -458,13 +466,21 @@ def add(rule_id: str, *, path: str | None, evidence: dict[str, Any]) -> None:
     decision = _decision_for(violations, release_decision=release_decision)
 
     # Coverage gap: check is boundary-only, so a clean ``allow`` over a diff
-    # that touches a manifest-declared tool surface would silently green-light
-    # a capability change that only ``verify`` gates. Escalate to ``warn`` and
-    # route to verify instead. Gated on ``release_decision is None`` because a
-    # provided release decision means the full capability scan already ran.
+    # that touches a tool/capability surface would silently green-light a
+    # capability change that only ``verify`` gates. Escalate to ``warn`` and
+    # route onward. Gated on ``release_decision is None`` because a provided
+    # release decision means the full capability scan already ran. The two
+    # surface lists are classified per-file upstream (declared vs undeclared);
+    # see the docstring. ``undeclared_gap`` is computed independently of the
+    # declared set and takes precedence so a mixed diff routes to
+    # declare-then-verify rather than a ``verify`` that misses the undeclared
+    # surface.
+    boundary_clean_allow = decision == "allow" and release_decision is None
     coverage_surfaces = sorted(dict.fromkeys(capability_surfaces_changed or []))
-    coverage_gap = decision == "allow" and release_decision is None and bool(coverage_surfaces)
-    if coverage_gap:
+    undeclared_surfaces = sorted(dict.fromkeys(undeclared_capability_surfaces or []))
+    coverage_gap = boundary_clean_allow and bool(coverage_surfaces)
+    undeclared_gap = boundary_clean_allow and bool(undeclared_surfaces)
+    if coverage_gap or undeclared_gap:
         decision = "warn"
 
     risk_level = _risk_for(violations)
@@ -478,7 +494,13 @@ def add(rule_id: str, *, path: str | None, evidence: dict[str, Any]) -> None:
         finding_fingerprints=finding_fingerprints,
         evaluated_files=evaluated_files,
     )
-    if coverage_gap:
+    if undeclared_gap:
+        first_next_action = _undeclared_next_action()
+        summary = _undeclared_summary(undeclared_surfaces)
+        diagnostics = [*diagnostics, _undeclared_diagnostic(undeclared_surfaces)]
+        trace = [*_trace_for(policy, decision, violations), _undeclared_trace(undeclared_surfaces)]
+        suggested_fixes = [_DETECT_COMMAND, _VERIFY_COMMAND]
+    elif coverage_gap:
         first_next_action = _coverage_next_action()
         summary = _coverage_summary(coverage_surfaces)
         diagnostics = [*diagnostics, _coverage_diagnostic(coverage_surfaces)]
@@ -1338,6 +1360,51 @@ def _risk_for(violations: list[AgentResultViolatedRule]) -> AgentResultRiskLevel
 # auto-detects the base (v0.13) and emits the boundary-result surface, so it
 # works for both the local working tree and committed refs.
 _VERIFY_COMMAND = "agents-shipgate verify --json"
+_DETECT_COMMAND = "agents-shipgate detect --json"
+
+
+def _undeclared_next_action() -> AgentResultNextAction:
+    return AgentResultNextAction(
+        actor="coding_agent",
+        kind="warn",
+        command=_DETECT_COMMAND,
+        why=(
+            "This diff changes a tool/capability surface that shipgate.yaml does not "
+            "declare, so neither check nor verify gates it yet. Declare the surface "
+            "(run detect or add it to tool_sources), then run verify before completing."
+        ),
+    )
+
+
+def _undeclared_summary(surfaces: list[str]) -> str:
+    return (
+        "No Codex boundary rule fired, but the diff changes a tool/capability surface "
+        f"({', '.join(surfaces[:5])}) that shipgate.yaml does not declare, so verify "
+        "cannot gate it yet. Declare it (detect or tool_sources) and run verify before "
+        "reporting completion."
+    )
+
+
+def _undeclared_diagnostic(surfaces: list[str]) -> AgentResultDiagnostic:
+    return AgentResultDiagnostic(
+        level="warning",
+        code="undeclared_capability_surface",
+        message=(
+            "Undeclared tool/capability surface(s) "
+            f"{', '.join(surfaces[:5])} changed; check is boundary-only and verify "
+            "only gates declared surfaces. Declare the surface, then verify."
+        ),
+    )
+
+
+def _undeclared_trace(surfaces: list[str]) -> AgentResultTraceEvent:
+    return AgentResultTraceEvent(
+        step="coverage",
+        summary=(
+            f"boundary_only: {len(surfaces)} undeclared tool surface(s) changed; "
+            "routed to detect + verify."
+        ),
+    )
 
 
 def _coverage_next_action() -> AgentResultNextAction: