From 279e53e123665499819044baca278c34f4ccf66c Mon Sep 17 00:00:00 2001
From: Bryan Bednarski <bbednarski@nvidia.com>
Date: Wed, 1 Jul 2026 07:48:16 -0600
Subject: [PATCH 1/3] docs: document LLM request intercept outcomes

Signed-off-by: Bryan Bednarski <bbednarski@nvidia.com>
---
 docs/build-plugins/code-examples.mdx          |  7 +-
 docs/build-plugins/register-behavior.mdx      | 10 +-
 .../instrument-applications/code-examples.mdx | 12 +--
 .../code-examples.mdx                         |  9 +-
 .../provider-codecs.mdx                       | 13 ++-
 .../llm-request-intercept-outcomes.mdx        | 97 +++++++++++++++++++
 6 files changed, 131 insertions(+), 17 deletions(-)
 create mode 100644 docs/reference/llm-request-intercept-outcomes.mdx
diff --git a/docs/build-plugins/code-examples.mdx b/docs/build-plugins/code-examples.mdx
index f2114c2cf..068e33f1d 100644
--- a/docs/build-plugins/code-examples.mdx
+++ b/docs/build-plugins/code-examples.mdx
@@ -37,11 +37,14 @@ class HeaderPlugin:
             name: str,
             request: nemo_relay.LLMRequest,
             annotated: nemo_relay.AnnotatedLLMRequest | None
-        ) -> tuple[nemo_relay.LLMRequest, nemo_relay.AnnotatedLLMRequest | None]:
+        ) -> nemo_relay.LLMRequestInterceptOutcome:
             # The request object is immutable, however we can return a new instance with updated headers.
             headers = request.headers.copy()
             headers[plugin_config["header_name"]] = plugin_config["value"]
-            return nemo_relay.LLMRequest(headers=headers, content=request.content), annotated
+            return nemo_relay.LLMRequestInterceptOutcome(
+                nemo_relay.LLMRequest(headers=headers, content=request.content),
+                annotated,
+            )
 
         context.register_llm_request_intercept("inject-header", 100, False, add_header)
 
diff --git a/docs/build-plugins/register-behavior.mdx b/docs/build-plugins/register-behavior.mdx
index d995ca773..82f01803f 100644
--- a/docs/build-plugins/register-behavior.mdx
+++ b/docs/build-plugins/register-behavior.mdx
@@ -51,10 +51,13 @@ class HeaderPlugin:
             name: str,
             request: nemo_relay.LLMRequest,
             annotated: nemo_relay.AnnotatedLLMRequest | None
-        ) -> tuple[nemo_relay.LLMRequest, nemo_relay.AnnotatedLLMRequest | None]:
+        ) -> nemo_relay.LLMRequestInterceptOutcome:
             headers = request.headers.copy()
             headers[plugin_config["header_name"]] = plugin_config["value"]
-            return nemo_relay.LLMRequest(headers=headers, content=request.content), annotated
+            return nemo_relay.LLMRequestInterceptOutcome(
+                nemo_relay.LLMRequest(headers=headers, content=request.content),
+                annotated,
+            )
 
         context.register_llm_request_intercept("inject-header", 100, False, add_header)
 
@@ -99,6 +102,7 @@ plugin.register('header-plugin', headerPlugin);
 
 <Tab title="Rust" language="rust">
 ```rust
+use nemo_relay::api::llm::LlmRequestInterceptOutcome;
 use nemo_relay::plugin::{
     register_plugin, ConfigDiagnostic, DiagnosticLevel, Plugin, PluginRegistrationContext,
     Result as PluginResult,
@@ -166,7 +170,7 @@ impl Plugin for HeaderPlugin {
                     request
                         .headers
                         .insert(header_name.clone(), header_value.clone().into());
-                    Ok((request, annotated))
+                    Ok(LlmRequestInterceptOutcome::new(request, annotated))
                 }),
             )?;
             Ok(())
diff --git a/docs/instrument-applications/code-examples.mdx b/docs/instrument-applications/code-examples.mdx
index 64d687da3..81a9d4407 100644
--- a/docs/instrument-applications/code-examples.mdx
+++ b/docs/instrument-applications/code-examples.mdx
@@ -260,8 +260,8 @@ tool_args = nemo_relay.tools.request_intercepts("search", {"query": "weather"})
 nemo_relay.tools.conditional_execution("search", tool_args)
 
 llm_request = LLMRequest({}, {"messages": [{"role": "user", "content": "hello"}]})
-llm_request = nemo_relay.llm.request_intercepts("demo-provider", llm_request)
-nemo_relay.llm.conditional_execution(llm_request)
+outcome = nemo_relay.llm.request_intercepts("demo-provider", llm_request)
+nemo_relay.llm.conditional_execution(outcome.request)
 ```
 </Tab>
 
@@ -279,8 +279,8 @@ const toolArgs = await toolRequestIntercepts('search', { query: 'weather' });
 await toolConditionalExecution('search', toolArgs);
 
 const request = new LlmRequest({}, { messages: [{ role: 'user', content: 'hello' }] });
-const rewritten = await llmRequestIntercepts('demo-provider', request);
-await llmConditionalExecution(rewritten);
+const outcome = await llmRequestIntercepts('demo-provider', request);
+await llmConditionalExecution(outcome.request);
 ```
 </Tab>
 
@@ -297,8 +297,8 @@ let request = LlmRequest {
     headers: Default::default(),
     content: json!({"messages": [{"role": "user", "content": "hello"}]}),
 };
-let rewritten = llm_request_intercepts("demo-provider", request)?;
-llm_conditional_execution(&rewritten)?;
+let outcome = llm_request_intercepts("demo-provider", request)?;
+llm_conditional_execution(&outcome.request)?;
 ```
 </Tab>
 
diff --git a/docs/integrate-into-frameworks/code-examples.mdx b/docs/integrate-into-frameworks/code-examples.mdx
index d989191fa..ffd9a7830 100644
--- a/docs/integrate-into-frameworks/code-examples.mdx
+++ b/docs/integrate-into-frameworks/code-examples.mdx
@@ -184,10 +184,11 @@ import nemo_relay
 from nemo_relay import LLMRequest
 
 rewritten_args = nemo_relay.tools.request_intercepts("search", {"query": "weather"})
-rewritten_request = nemo_relay.llm.request_intercepts(
+outcome = nemo_relay.llm.request_intercepts(
     "demo-provider",
     LLMRequest({}, {"messages": []}),
 )
+rewritten_request = outcome.request
 ```
 </Tab>
 
@@ -196,7 +197,8 @@ rewritten_request = nemo_relay.llm.request_intercepts(
 import { LlmRequest, llmRequestIntercepts, toolRequestIntercepts } from 'nemo-relay-node';
 
 const rewrittenArgs = await toolRequestIntercepts('search', { query: 'weather' });
-const rewrittenRequest = await llmRequestIntercepts('demo-provider', new LlmRequest({}, { messages: [] }));
+const outcome = await llmRequestIntercepts('demo-provider', new LlmRequest({}, { messages: [] }));
+const rewrittenRequest = outcome.request;
 ```
 </Tab>
 
@@ -208,7 +210,8 @@ use serde_json::json;
 
 let rewritten_args = tool_request_intercepts("search", json!({"query": "weather"}))?;
 let request = LlmRequest { headers: Default::default(), content: json!({"messages": []}) };
-let rewritten_request = llm_request_intercepts("demo-provider", request)?;
+let outcome = llm_request_intercepts("demo-provider", request)?;
+let rewritten_request = outcome.request;
 ```
 </Tab>
 
diff --git a/docs/integrate-into-frameworks/provider-codecs.mdx b/docs/integrate-into-frameworks/provider-codecs.mdx
index f0d632661..90df6c7f6 100644
--- a/docs/integrate-into-frameworks/provider-codecs.mdx
+++ b/docs/integrate-into-frameworks/provider-codecs.mdx
@@ -41,10 +41,17 @@ When a managed LLM call has a request codec:
 
 1. NeMo Relay calls `decode` before LLM request intercepts run.
 2. Request intercepts receive both the raw request and the annotated request.
-3. Intercepts may edit the raw request, the annotated request, or both.
+3. Intercepts edit provider-body fields through the annotated request and may
+   edit transport headers through the raw request. Raw `request.content` is
+   read-only while the codec is active.
 4. NeMo Relay calls `encode` to merge the annotated request back into the original raw request.
 5. Execution intercepts and the provider callback receive the encoded provider request.
 
+If a codec-aware intercept changes raw `request.content` or omits the returned
+annotation, Relay rejects the outcome before creating the LLM lifecycle. When
+no request codec is active, the raw request remains fully writable and is the
+provider-visible source of truth.
+
 When a managed LLM call has a response codec, NeMo Relay decodes the raw provider response for observability and attaches the result to the emitted LLM end event. The response codec does not rewrite the value returned to the application. Use [Provider Response Codecs](/integrate-into-frameworks/provider-response-codecs) for response-only behavior and custom response codec examples.
 
 Codec implementations should preserve fields they do not understand. Treat `encode` as a merge operation over the original provider payload, not as a full replacement.
@@ -87,7 +94,7 @@ from nemo_relay.codecs import OpenAIChatCodec
 
 def add_system_message(_name, request, annotated):
     if annotated is None:
-        return request, annotated
+        return nemo_relay.LLMRequestInterceptOutcome(request)
 
     # Attributes of the annotated request can be re-assigned, but cannot be modified in-place.
     # For example `annotated.messages.append(...)` would not work, but re-assigning
@@ -96,7 +103,7 @@ def add_system_message(_name, request, annotated):
         {"role": "system", "content": "Answer with concise technical detail."},
         *annotated.messages,
     ]
-    return request, annotated
+    return nemo_relay.LLMRequestInterceptOutcome(request, annotated)
 
 nemo_relay.intercepts.register_llm_request(
     "framework.add_system_message",
diff --git a/docs/reference/llm-request-intercept-outcomes.mdx b/docs/reference/llm-request-intercept-outcomes.mdx
new file mode 100644
index 000000000..b184a35ec
--- /dev/null
+++ b/docs/reference/llm-request-intercept-outcomes.mdx
@@ -0,0 +1,97 @@
+---
+title: "LLM Request Intercept Outcomes"
+description: "Canonical request-intercept result and managed lifecycle behavior."
+---
+{/* SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+SPDX-License-Identifier: Apache-2.0 */}
+
+Every LLM request intercept returns one canonical outcome:
+
+```json
+{
+  "request": {"headers": {}, "content": {}},
+  "annotated_request": null,
+  "pending_marks": []
+}
+```
+
+`request` is required. `annotated_request` defaults to `null` when omitted on
+input, and `pending_marks` defaults to an empty list. Canonical serialization
+includes all three fields. A pending mark contains only `name`, optional
+`category` and `category_profile`, and optional `data` and `metadata`. Relay
+owns event UUIDs, parent UUIDs, and timestamps.
+
+## Request Authority
+
+The provider-body source of truth depends only on whether a request codec is
+active:
+
+| Request codec | Provider body source | Header source |
+| --- | --- | --- |
+| No codec | `outcome.request.content` | `outcome.request.headers` |
+| Active codec | `outcome.annotated_request` | `outcome.request.headers` |
+
+With an active codec, `request.content` is read-only context. Every intercept
+must return an annotation and make provider-body changes through that
+annotation, including its flattened `extra` fields for provider-specific data.
+Relay rejects a changed raw body or missing annotation at the offending
+intercept before invoking later middleware or creating an LLM lifecycle.
+
+```mermaid
+flowchart TD
+    INPUT["Original LlmRequest"] --> CODEC{"Request codec active?"}
+
+    CODEC -->|No| RAWCHAIN["Run intercept chain"]
+    RAWCHAIN --> RAWPROVIDER["Provider receives outcome.request"]
+
+    CODEC -->|Yes| DECODE["Decode content into annotated_request"]
+    DECODE --> INTERCEPT["Invoke next intercept"]
+    INTERCEPT --> CHECKANN{"Annotation returned?"}
+    CHECKANN -->|No| FAIL["Fail before lifecycle"]
+    CHECKANN -->|Yes| CHECKRAW{"request.content unchanged?"}
+    CHECKRAW -->|No| FAIL
+    CHECKRAW -->|Yes| MORE{"More intercepts?"}
+    MORE -->|Yes| INTERCEPT
+    MORE -->|No| ENCODE["Encode final annotated_request"]
+    ENCODE --> HEADERS["Apply final request.headers"]
+    HEADERS --> PROVIDER["Provider receives one resolved LlmRequest"]
+```
+
+Python callbacks return `LLMRequestInterceptOutcome`; Rust callbacks return
+`LlmRequestInterceptOutcome`; Go callbacks return
+`LLMRequestInterceptOutcome`; and Node.js and WebAssembly callbacks return
+`{ request, annotated?, pendingMarks? }`, with `categoryProfile` on each
+JavaScript pending-mark DTO. The canonical JSON forms retain `pending_marks`
+and `category_profile`. Public C callbacks write one owned canonical outcome
+JSON string. Native ABI v1 uses one host-owned outcome JSON string. Rust and
+Python `grpc-v1` worker SDKs return their canonical outcome type in a
+`JsonEnvelope` whose schema is
+`nemo.relay.LlmRequestInterceptOutcome@1`.
+
+The standalone request-intercept helper returns the complete outcome but does
+not emit its pending marks because it does not own an LLM lifecycle.
+
+## Managed Lifecycle
+
+Managed execution runs all effective global and scope-local intercepts before
+creating the LLM handle. Each accepted request/annotation pair feeds the next
+intercept under the authority rules above, while pending marks append in
+middleware order. A breaking
+intercept's marks are retained. If any intercept fails or its boundary result
+is malformed, Relay discards all accumulated marks and creates no LLM
+lifecycle.
+
+After successful interception, Relay creates the handle and captures one
+subscriber snapshot. It emits the LLM start at `T`, every pending mark at
+`T + 1µs` in returned order with the LLM UUID as parent, and the LLM end no
+earlier than `T + 1µs`. Streaming and non-streaming calls use the same rules.
+Pending marks are never added to the provider request, annotated request,
+codec input, sanitizer input, or start payload.
+
+## Migration
+
+This finalizes unpublished native ABI v1 and `grpc-v1` contracts. Rebuild all
+development native plugins and workers. Replace tuple results, split C/Go
+outputs, metadata envelopes, and parallel mark-aware registrations with the
+canonical outcome and the existing `register_llm_request_intercept`
+registration name.

From 93f37b10c6196c7abf2ed240d3f889da060ecf20 Mon Sep 17 00:00:00 2001
From: Bryan Bednarski <bbednarski@nvidia.com>
Date: Wed, 1 Jul 2026 22:43:07 -0600
Subject: [PATCH 2/3] docs: document tool execution intercept outcomes

Signed-off-by: Bryan Bednarski <bbednarski@nvidia.com>
---
 .../advanced-guide.mdx                        | 16 +++-
 .../llm-request-intercept-outcomes.mdx        | 55 ++++++++-----
 .../tool-execution-intercept-outcomes.mdx     | 77 +++++++++++++++++++
 3 files changed, 124 insertions(+), 24 deletions(-)
 create mode 100644 docs/reference/tool-execution-intercept-outcomes.mdx

diff --git a/docs/instrument-applications/advanced-guide.mdx b/docs/instrument-applications/advanced-guide.mdx
index 105468535..f2bed03c6 100644
--- a/docs/instrument-applications/advanced-guide.mdx
+++ b/docs/instrument-applications/advanced-guide.mdx
@@ -65,7 +65,8 @@ def require_query(tool_name, args):
 async def measure_tool(tool_name, args, next_call):
     started = time.perf_counter()
     try:
-        return await next_call(args)
+        result = await next_call(args)
+        return nemo_relay.ToolExecutionInterceptOutcome(result)
     finally:
         elapsed_ms = round((time.perf_counter() - started) * 1000, 2)
         print(f"{tool_name} completed in {elapsed_ms} ms")
@@ -98,7 +99,8 @@ registerToolConditionalExecutionGuardrail("search.require_query", 20, (_toolName
 registerToolExecutionIntercept("search.measure", 30, async (args, next) => {
   const started = performance.now();
   try {
-    return await next(args);
+    const result = await next(args);
+    return { result };
   } finally {
     console.log(`search completed in ${Math.round(performance.now() - started)} ms`);
   }
@@ -113,6 +115,7 @@ use nemo_relay::api::registry::{
     register_tool_execution_intercept,
     register_tool_sanitize_request_guardrail,
 };
+use nemo_relay::api::tool::ToolExecutionInterceptOutcome;
 use serde_json::json;
 use std::sync::Arc;
 use std::time::Instant;
@@ -145,11 +148,12 @@ register_tool_execution_intercept(
     "search.measure",
     30,
     Arc::new(|name, args, next| {
+        let name = name.to_owned();
         Box::pin(async move {
             let started = Instant::now();
-            let result = next(name.clone(), args).await;
+            let result = next(args).await;
             println!("{name} completed in {:?}", started.elapsed());
-            result
+            Ok(ToolExecutionInterceptOutcome::new(result?))
         })
     }),
 )?;
@@ -158,6 +162,10 @@ register_tool_execution_intercept(
 
 </Tabs>
 
+Tool execution intercepts return an outcome even when they only forward the
+result from `next`. See [Tool Execution Intercept Outcomes](/reference/tool-execution-intercept-outcomes)
+for pending marks, lifecycle ordering, and binding-specific return types.
+
 ## Scope Middleware to One Request
 
 Use scope-local middleware when a policy applies only to one request, tenant, experiment, or agent run.
diff --git a/docs/reference/llm-request-intercept-outcomes.mdx b/docs/reference/llm-request-intercept-outcomes.mdx
index b184a35ec..37b19cab8 100644
--- a/docs/reference/llm-request-intercept-outcomes.mdx
+++ b/docs/reference/llm-request-intercept-outcomes.mdx
@@ -1,11 +1,15 @@
 ---
 title: "LLM Request Intercept Outcomes"
-description: "Canonical request-intercept result and managed lifecycle behavior."
+description: "Canonical result returned by LLM request intercepts and its managed lifecycle behavior."
 ---
 {/* SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 SPDX-License-Identifier: Apache-2.0 */}
 
-Every LLM request intercept returns one canonical outcome:
+An LLM request intercept rewrites a request before managed execution. This page
+describes the canonical outcome returned by each intercept, including how Relay
+uses it to resolve the provider request and schedule lifecycle marks.
+
+A canonical outcome serialization looks like this:
 
 ```json
 {
@@ -17,13 +21,13 @@ Every LLM request intercept returns one canonical outcome:
 
 `request` is required. `annotated_request` defaults to `null` when omitted on
 input, and `pending_marks` defaults to an empty list. Canonical serialization
-includes all three fields. A pending mark contains only `name`, optional
+includes all three fields. A pending mark only contains `name`, optional
 `category` and `category_profile`, and optional `data` and `metadata`. Relay
 owns event UUIDs, parent UUIDs, and timestamps.
 
 ## Request Authority
 
-The provider-body source of truth depends only on whether a request codec is
+The provider-body source of truth only depends on whether a request codec is
 active:
 
 | Request codec | Provider body source | Header source |
@@ -37,6 +41,9 @@ annotation, including its flattened `extra` fields for provider-specific data.
 Relay rejects a changed raw body or missing annotation at the offending
 intercept before invoking later middleware or creating an LLM lifecycle.
 
+The following diagram shows how Relay resolves an intercept outcome before
+managed execution.
+
 ```mermaid
 flowchart TD
     INPUT["Original LlmRequest"] --> CODEC{"Request codec active?"}
@@ -57,16 +64,21 @@ flowchart TD
     HEADERS --> PROVIDER["Provider receives one resolved LlmRequest"]
 ```
 
-Python callbacks return `LLMRequestInterceptOutcome`; Rust callbacks return
-`LlmRequestInterceptOutcome`; Go callbacks return
-`LLMRequestInterceptOutcome`; and Node.js and WebAssembly callbacks return
-`{ request, annotated?, pendingMarks? }`, with `categoryProfile` on each
-JavaScript pending-mark DTO. The canonical JSON forms retain `pending_marks`
-and `category_profile`. Public C callbacks write one owned canonical outcome
-JSON string. Native ABI v1 uses one host-owned outcome JSON string. Rust and
-Python `grpc-v1` worker SDKs return their canonical outcome type in a
-`JsonEnvelope` whose schema is
-`nemo.relay.LlmRequestInterceptOutcome@1`.
+## Binding Contract
+
+The following callbacks return the same logical outcome in their native type
+or object shape:
+
+- Python callbacks return `LLMRequestInterceptOutcome`.
+- Rust callbacks return `LlmRequestInterceptOutcome`.
+- Go callbacks return `LLMRequestInterceptOutcome`.
+- Node.js and WebAssembly callbacks return `{ request, annotated?, pendingMarks? }`.
+  JavaScript pending-mark DTOs use `categoryProfile`; canonical JSON retains
+  `pending_marks` and `category_profile`.
+- Public C callbacks return one owned canonical outcome JSON string, and native
+  ABI v1 callbacks return one host-owned outcome JSON string.
+- Rust and Python `grpc-v1` worker SDKs return their canonical outcome in a
+  `JsonEnvelope` with schema `nemo.relay.LlmRequestInterceptOutcome@1`.
 
 The standalone request-intercept helper returns the complete outcome but does
 not emit its pending marks because it does not own an LLM lifecycle.
@@ -74,12 +86,11 @@ not emit its pending marks because it does not own an LLM lifecycle.
 ## Managed Lifecycle
 
 Managed execution runs all effective global and scope-local intercepts before
-creating the LLM handle. Each accepted request/annotation pair feeds the next
-intercept under the authority rules above, while pending marks append in
-middleware order. A breaking
-intercept's marks are retained. If any intercept fails or its boundary result
-is malformed, Relay discards all accumulated marks and creates no LLM
-lifecycle.
+creating the LLM handle. Each accepted request and annotation pair feeds the
+next intercept under the authority rules above, while pending marks append in
+middleware order. A breaking intercept retains the marks it returned. If any
+intercept fails or its boundary result is malformed, Relay discards all
+accumulated marks and creates no LLM lifecycle.
 
 After successful interception, Relay creates the handle and captures one
 subscriber snapshot. It emits the LLM start at `T`, every pending mark at
@@ -95,3 +106,7 @@ development native plugins and workers. Replace tuple results, split C/Go
 outputs, metadata envelopes, and parallel mark-aware registrations with the
 canonical outcome and the existing `register_llm_request_intercept`
 registration name.
+
+## Related Topics
+
+- [Tool Execution Intercept Outcomes](/reference/tool-execution-intercept-outcomes)
diff --git a/docs/reference/tool-execution-intercept-outcomes.mdx b/docs/reference/tool-execution-intercept-outcomes.mdx
new file mode 100644
index 000000000..483477a21
--- /dev/null
+++ b/docs/reference/tool-execution-intercept-outcomes.mdx
@@ -0,0 +1,77 @@
+---
+title: "Tool Execution Intercept Outcomes"
+description: "Canonical result returned by tool execution intercepts and its managed lifecycle behavior."
+---
+{/* SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+SPDX-License-Identifier: Apache-2.0 */}
+
+A tool execution intercept wraps or short-circuits a managed tool callback.
+Every intercept returns one canonical outcome so Relay can keep lifecycle
+control data separate from the application-visible tool result.
+
+```json
+{
+  "result": {},
+  "pending_marks": []
+}
+```
+
+`result` is required and is the value that reaches the remaining middleware and
+the application. `pending_marks` defaults to an empty list. Each pending mark
+contains `name`, optional `category` and `category_profile`, and optional
+`data` and `metadata`. Relay assigns event UUIDs, parent UUIDs, and timestamps.
+
+## Continuation Semantics
+
+The default tool callback and an intercept's `next(args)` continuation both
+remain raw JSON APIs. `next(args)` returns only the downstream tool result; it
+does not expose downstream pending marks. Relay retains those marks, combines
+them in effective middleware order, and materializes them only after the final
+outcome succeeds.
+
+There is no mark-specific registration path. Use the existing global,
+scope-local, or plugin-context tool execution registration APIs and return the
+canonical outcome from every registered callback. Legacy raw intercept returns
+are rejected at public and dynamic-plugin boundaries.
+
+## Managed Lifecycle
+
+On successful managed execution, Relay emits the tool end event before any
+pending marks. The end timestamp is no earlier than one microsecond after the
+tool start timestamp. Relay emits pending marks in their resolved middleware
+order, assigns each mark the managed tool UUID as its parent, and gives each
+mark a timestamp after the tool end event.
+
+If execution or an intercept fails, Relay emits the error end event and
+discards accumulated pending marks. Pending marks are never included in the
+application-visible tool result or passed to sanitize-response guardrails.
+
+## Binding Contract
+
+The following callbacks return the same logical outcome in their native type
+or object shape:
+
+- Python callbacks return `ToolExecutionInterceptOutcome`.
+- Rust callbacks and native plugins return `ToolExecutionInterceptOutcome`.
+- Go callbacks return `ToolExecutionInterceptOutcome`.
+- Node.js callbacks return `{ result, pendingMarks? }`, where JavaScript
+  pending-mark DTOs use `categoryProfile`.
+- Public C callbacks return canonical JSON with `result` and optional
+  `pending_marks`.
+- `grpc-v1` worker SDKs return a `ToolExecutionInterceptOutcome` in a
+  `JsonEnvelope` with schema `nemo.relay.ToolExecutionInterceptOutcome@1`.
+
+Canonical JSON uses `pending_marks` and `category_profile` across bindings.
+
+## Migration
+
+This finalizes the unpublished tool execution intercept contract. Update every
+registered tool execution intercept to return the canonical outcome, while
+leaving the default tool callback and `next(args)` continuation as raw JSON.
+Rebuild development native plugins and workers against the current Relay main
+branch.
+
+## Related Topics
+
+- [LLM Request Intercept Outcomes](/reference/llm-request-intercept-outcomes)
+- [Add Middleware](/instrument-applications/advanced-guide)

From 24ef58fd038ac9caf0ed3aeefe66e3016808b8ce Mon Sep 17 00:00:00 2001
From: Bryan Bednarski <bbednarski@nvidia.com>
Date: Thu, 2 Jul 2026 11:40:55 -0600
Subject: [PATCH 3/3] docs: clarify tool outcome reference

Signed-off-by: Bryan Bednarski <bbednarski@nvidia.com>
---
 docs/instrument-applications/advanced-guide.mdx   | 2 +-
 docs/reference/llm-request-intercept-outcomes.mdx | 6 ++++++
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/docs/instrument-applications/advanced-guide.mdx b/docs/instrument-applications/advanced-guide.mdx
index f2bed03c6..8bf0517ca 100644
--- a/docs/instrument-applications/advanced-guide.mdx
+++ b/docs/instrument-applications/advanced-guide.mdx
@@ -163,7 +163,7 @@ register_tool_execution_intercept(
 </Tabs>
 
 Tool execution intercepts return an outcome even when they only forward the
-result from `next`. See [Tool Execution Intercept Outcomes](/reference/tool-execution-intercept-outcomes)
+result from `next`. Refer to [Tool Execution Intercept Outcomes](/reference/tool-execution-intercept-outcomes)
 for pending marks, lifecycle ordering, and binding-specific return types.
 
 ## Scope Middleware to One Request
diff --git a/docs/reference/llm-request-intercept-outcomes.mdx b/docs/reference/llm-request-intercept-outcomes.mdx
index 37b19cab8..57ffa0300 100644
--- a/docs/reference/llm-request-intercept-outcomes.mdx
+++ b/docs/reference/llm-request-intercept-outcomes.mdx
@@ -30,6 +30,12 @@ owns event UUIDs, parent UUIDs, and timestamps.
 The provider-body source of truth only depends on whether a request codec is
 active:
 
+Request codecs translate provider-specific request payloads into Relay's
+normalized annotated request for intercepts, then encode accepted annotated
+edits back into the provider request before execution. They normalize the
+payload shape rather than translating between providers; response codecs are a
+separate response-side path used to attach normalized data to lifecycle events.
+
 | Request codec | Provider body source | Header source |
 | --- | --- | --- |
 | No codec | `outcome.request.content` | `outcome.request.headers` |