diff --git a/.github/workflows/e2e.yml b/.github/workflows/e2e.yml
new file mode 100644
index 0000000..5c36863
--- /dev/null
+++ b/.github/workflows/e2e.yml
@@ -0,0 +1,232 @@
+name: E2E Integration
+
+# Validates that the plugin works end-to-end against the latest published
+# OpenClaw release. Installs OpenClaw fresh, builds and installs the plugin
+# from source, starts the gateway with a mock LLM + mock Opik server, runs
+# a real agent turn, and asserts that traces and spans were exported.
+#
+# This catches regressions caused by OpenClaw plugin lifecycle changes that
+# unit tests cannot detect.
+
+permissions:
+  contents: read
+
+on:
+  push:
+    branches: [main]
+    paths:
+      - ".github/workflows/e2e.yml"
+      - "index.ts"
+      - "src/**"
+      - "scripts/**"
+      - "package.json"
+  pull_request:
+    branches: [main]
+    paths:
+      - ".github/workflows/e2e.yml"
+      - "index.ts"
+      - "src/**"
+      - "scripts/**"
+      - "package.json"
+  schedule:
+    - cron: '0 6 * * *'  # daily at 6am UTC — catches new OpenClaw releases breaking the plugin
+  workflow_dispatch:
+
+jobs:
+  e2e:
+    name: E2E against OpenClaw ${{ matrix.openclaw-version }}
+    runs-on: ubuntu-latest
+    timeout-minutes: 15
+
+    strategy:
+      fail-fast: false
+      matrix:
+        openclaw-version:
+          - latest
+
+    env:
+      E2E_RESULT_FILE: e2e-result.json
+      E2E_LLM_RESULT_FILE: e2e-llm-result.json
+      OPENCLAW_GATEWAY_TOKEN: e2e-test-token
+
+    steps:
+      - name: Checkout plugin source
+        uses: actions/checkout@v5
+
+      - name: Setup Node.js
+        uses: actions/setup-node@v5
+        with:
+          node-version: "22.x"
+          cache: "npm"
+
+      - name: Setup npm
+        run: npm install -g npm@11.6.2
+
+      - name: Setup npm global prefix for caching
+        run: |
+          npm config set prefix ~/.npm-global
+          echo "$HOME/.npm-global/bin" >> $GITHUB_PATH
+
+      - name: Cache OpenClaw install
+        uses: actions/cache@v4
+        with:
+          path: ~/.npm-global
+          key: openclaw-${{ matrix.openclaw-version }}-${{ runner.os }}-${{ runner.arch }}
+
+      - name: Install OpenClaw (${{ matrix.openclaw-version }})
+        run: npm install -g openclaw@${{ matrix.openclaw-version }}
+
+      - name: Print OpenClaw version
+        run: openclaw --version
+
+      - name: Install plugin dependencies
+        run: npm ci
+
+      - name: Build plugin tarball
+        run: npm pack
+
+      - name: Write OpenClaw config
+        run: |
+          mkdir -p ~/.openclaw/agents/main/sessions
+          cat > ~/.openclaw/openclaw.json << 'EOF'
+          {
+            "gateway": {
+              "mode": "local",
+              "bind": "loopback",
+              "auth": { "mode": "token", "token": "e2e-test-token" },
+              "port": 18789
+            },
+            "agents": {
+              "defaults": {
+                "model": {
+                  "primary": "mock-openai/gpt-4o-mini"
+                }
+              }
+            },
+            "models": {
+              "mode": "merge",
+              "providers": {
+                "mock-openai": {
+                  "baseUrl": "http://127.0.0.1:18790/v1",
+                  "apiKey": "mock-key",
+                  "authHeader": true,
+                  "api": "openai-responses",
+                  "models": [
+                    {
+                      "id": "gpt-4o-mini",
+                      "name": "Mock GPT-4o Mini",
+                      "reasoning": false,
+                      "input": ["text"],
+                      "cost": {
+                        "input": 0,
+                        "output": 0,
+                        "cacheRead": 0,
+                        "cacheWrite": 0
+                      },
+                      "contextWindow": 128000,
+                      "maxTokens": 16384
+                    }
+                  ]
+                }
+              }
+            },
+            "plugins": {
+              "allow": ["opik-openclaw"],
+              "entries": {
+                "opik-openclaw": {
+                  "enabled": true,
+                  "config": {
+                    "enabled": true,
+                    "apiUrl": "http://127.0.0.1:18791",
+                    "apiKey": "mock-key",
+                    "projectName": "e2e-test",
+                    "workspaceName": "default"
+                  }
+                }
+              }
+            }
+          }
+          EOF
+
+      - name: Install plugin from tarball
+        run: openclaw plugins install ./opik-opik-openclaw-*.tgz
+
+      - name: Start mock Opik server
+        run: |
+          node scripts/mock-opik-server.mjs > mock-opik.log 2>&1 &
+          echo $! > mock-opik.pid
+        env:
+          MOCK_OPIK_PORT: "18791"
+
+      - name: Start mock LLM server
+        run: |
+          node scripts/mock-llm-server.mjs > mock-llm.log 2>&1 &
+          echo $! > mock-llm.pid
+        env:
+          MOCK_LLM_PORT: "18790"
+
+      - name: Wait for mock servers to be ready
+        run: |
+          for i in $(seq 1 10); do
+            curl -sf http://127.0.0.1:18791/health > /dev/null 2>&1 && \
+            curl -sf http://127.0.0.1:18790/v1/models > /dev/null 2>&1 && break
+            sleep 1
+          done
+
+      - name: Start OpenClaw gateway
+        run: |
+          openclaw gateway run > gateway.log 2>&1 &
+          echo $! > gateway.pid
+
+      - name: Wait for gateway to be ready
+        run: |
+          for i in $(seq 1 15); do
+            openclaw health > /dev/null 2>&1 && break
+            sleep 1
+          done
+          openclaw health
+
+      - name: Run agent turn
+        run: |
+          set -o pipefail
+          openclaw agent --agent main --message "ping" --deliver 2>&1 | tee agent-output.log
+          if grep -q "falling back to embedded" agent-output.log; then
+            echo "[e2e] FAIL: gateway turn fell back to embedded mode"
+            exit 1
+          fi
+        timeout-minutes: 2
+
+      - name: Stop gateway and flush traces
+        run: |
+          openclaw gateway stop || true
+          if [ -f gateway.pid ]; then
+            kill "$(cat gateway.pid)" > /dev/null 2>&1 || true
+          fi
+          sleep 2
+
+      - name: Stop mock servers and collect results
+        run: |
+          if [ -f mock-opik.pid ]; then
+            kill "$(cat mock-opik.pid)" > /dev/null 2>&1 || true
+          fi
+          if [ -f mock-llm.pid ]; then
+            kill "$(cat mock-llm.pid)" > /dev/null 2>&1 || true
+          fi
+          sleep 1
+
+      - name: Assert E2E results
+        run: node scripts/check-e2e-result.mjs
+
+      - name: Upload E2E result on failure
+        if: failure()
+        uses: actions/upload-artifact@v4
+        with:
+          name: e2e-debug
+          path: |
+            e2e-result.json
+            e2e-llm-result.json
+            agent-output.log
+            gateway.log
+            mock-opik.log
+            mock-llm.log
+          if-no-files-found: ignore
diff --git a/scripts/check-e2e-result.mjs b/scripts/check-e2e-result.mjs
new file mode 100644
index 0000000..aa844eb
--- /dev/null
+++ b/scripts/check-e2e-result.mjs
@@ -0,0 +1,71 @@
+#!/usr/bin/env node
+/**
+ * Reads the E2E result file written by mock-opik-server.mjs and exits non-zero
+ * if the minimum expected trace/span counts were not met.
+ */
+
+import fs from "node:fs";
+
+const RESULT_FILE = process.env.E2E_RESULT_FILE ?? "e2e-result.json";
+const LLM_RESULT_FILE = process.env.E2E_LLM_RESULT_FILE ?? "e2e-llm-result.json";
+
+if (!fs.existsSync(RESULT_FILE)) {
+  console.error(`[check-e2e] FAIL: result file not found: ${RESULT_FILE}`);
+  console.error("  The mock Opik server may not have written its result (SIGTERM not received?).");
+  process.exit(1);
+}
+
+const result = JSON.parse(fs.readFileSync(RESULT_FILE, "utf8"));
+console.log("[check-e2e] result:", result);
+
+if (!fs.existsSync(LLM_RESULT_FILE)) {
+  console.error(`[check-e2e] FAIL: LLM result file not found: ${LLM_RESULT_FILE}`);
+  console.error("  The mock LLM server may not have written its result (SIGTERM not received?).");
+  process.exit(1);
+}
+
+const llmResult = JSON.parse(fs.readFileSync(LLM_RESULT_FILE, "utf8"));
+console.log("[check-e2e] llm result:", llmResult);
+
+const failures = [];
+const llmGenerationRequests = (llmResult.responses ?? 0) + (llmResult.chatCompletions ?? 0);
+const traceFinalizations = (result.tracePatches ?? 0) + (result.endedTraces ?? 0);
+const spanFinalizations = (result.spanPatches ?? 0) + (result.endedSpans ?? 0);
+
+if (result.traces < 1) {
+  failures.push(`Expected ≥1 trace batch, got ${result.traces}`);
+}
+
+if (result.spans < 1) {
+  failures.push(`Expected ≥1 span batch, got ${result.spans}`);
+}
+
+if (traceFinalizations < 1) {
+  failures.push(
+    `Expected ≥1 finalized trace (patch or batch endTime), got patches=${result.tracePatches ?? 0} ended=${result.endedTraces ?? 0}`,
+  );
+}
+
+if (spanFinalizations < 1) {
+  failures.push(
+    `Expected ≥1 finalized span (patch or batch endTime), got patches=${result.spanPatches ?? 0} ended=${result.endedSpans ?? 0}`,
+  );
+}
+
+if (result.totalRequests < 1) {
+  failures.push("No requests at all reached the mock Opik server — plugin hooks may not have fired");
+}
+
+if (llmGenerationRequests < 1) {
+  failures.push(
+    `Expected ≥1 mock LLM generation request, got ${llmGenerationRequests}`,
+  );
+}
+
+if (failures.length > 0) {
+  console.error("[check-e2e] FAIL:");
+  for (const f of failures) console.error("  •", f);
+  process.exit(1);
+}
+
+console.log("[check-e2e] PASS — traces, spans, patches, and mock LLM traffic were observed");
diff --git a/scripts/mock-llm-server.mjs b/scripts/mock-llm-server.mjs
new file mode 100644
index 0000000..415a642
--- /dev/null
+++ b/scripts/mock-llm-server.mjs
@@ -0,0 +1,190 @@
+#!/usr/bin/env node
+/**
+ * Minimal OpenAI-compatible mock LLM server for E2E tests.
+ *
+ * Returns a canned chat completion response so the OpenClaw gateway can
+ * complete a full agent turn (llm_input → llm_output → agent_end) without
+ * a real model API key.
+ *
+ * Supports both streaming (SSE) and non-streaming responses because OpenClaw
+ * may request either depending on config.
+ */
+
+import http from "node:http";
+import fs from "node:fs";
+
+const PORT = parseInt(process.env.MOCK_LLM_PORT ?? "18790", 10);
+const MODEL = "gpt-4o-mini";
+const RESULT_FILE = process.env.E2E_LLM_RESULT_FILE ?? "e2e-llm-result.json";
+const RESPONSE_TEXT = "pong";
+
+const received = {
+  models: 0,
+  responses: 0,
+  streamingResponses: 0,
+  chatCompletions: 0,
+  streamingChatCompletions: 0,
+};
+
+function nonStreamingResponse(model) {
+  return JSON.stringify({
+    id: "chatcmpl-e2e-mock",
+    object: "chat.completion",
+    created: Math.floor(Date.now() / 1000),
+    model,
+    choices: [
+      {
+        index: 0,
+        message: { role: "assistant", content: RESPONSE_TEXT },
+        finish_reason: "stop",
+      },
+    ],
+    usage: { prompt_tokens: 5, completion_tokens: 2, total_tokens: 7 },
+  });
+}
+
+function streamingChunks(model) {
+  const id = "chatcmpl-e2e-mock";
+  const created = Math.floor(Date.now() / 1000);
+
+  const chunks = [
+    { id, object: "chat.completion.chunk", created, model, choices: [{ index: 0, delta: { role: "assistant", content: "" }, finish_reason: null }] },
+    { id, object: "chat.completion.chunk", created, model, choices: [{ index: 0, delta: { content: RESPONSE_TEXT }, finish_reason: null }] },
+    { id, object: "chat.completion.chunk", created, model, choices: [{ index: 0, delta: {}, finish_reason: "stop" }] },
+  ];
+
+  return chunks.map((c) => `data: ${JSON.stringify(c)}\n\n`).join("") + "data: [DONE]\n\n";
+}
+
+function responseObject(model) {
+  return {
+    id: "resp-e2e-mock",
+    object: "response",
+    created_at: Math.floor(Date.now() / 1000),
+    model,
+    status: "completed",
+    output: [
+      {
+        id: "msg-e2e-mock",
+        type: "message",
+        role: "assistant",
+        content: [
+          {
+            type: "output_text",
+            text: RESPONSE_TEXT,
+            annotations: [],
+          },
+        ],
+      },
+    ],
+    output_text: RESPONSE_TEXT,
+    usage: { input_tokens: 5, output_tokens: 2, total_tokens: 7 },
+  };
+}
+
+function streamingResponseEvents(model) {
+  const response = responseObject(model);
+  const message = response.output[0];
+  const part = message.content[0];
+
+  const events = [
+    ["response.created", { type: "response.created", response: { ...response, status: "in_progress", output: [] } }],
+    ["response.in_progress", { type: "response.in_progress", response: { ...response, status: "in_progress", output: [] } }],
+    ["response.output_item.added", { type: "response.output_item.added", output_index: 0, item: { ...message, content: [] } }],
+    ["response.content_part.added", { type: "response.content_part.added", output_index: 0, item_id: message.id, content_index: 0, part: { type: "output_text", text: "" } }],
+    ["response.output_text.delta", { type: "response.output_text.delta", output_index: 0, item_id: message.id, content_index: 0, delta: RESPONSE_TEXT }],
+    ["response.output_text.done", { type: "response.output_text.done", output_index: 0, item_id: message.id, content_index: 0, text: RESPONSE_TEXT }],
+    ["response.content_part.done", { type: "response.content_part.done", output_index: 0, item_id: message.id, content_index: 0, part }],
+    ["response.output_item.done", { type: "response.output_item.done", output_index: 0, item: message }],
+    ["response.completed", { type: "response.completed", response }],
+  ];
+
+  return events
+    .map(([event, data]) => `event: ${event}\ndata: ${JSON.stringify(data)}\n\n`)
+    .join("") + "data: [DONE]\n\n";
+}
+
+const server = http.createServer((req, res) => {
+  let raw = "";
+  req.on("data", (chunk) => (raw += chunk));
+  req.on("end", () => {
+    console.error(`[mock-llm] ${req.method} ${req.url}`);
+
+    if (req.url === "/v1/models" && req.method === "GET") {
+      received.models += 1;
+      res.writeHead(200, { "Content-Type": "application/json" });
+      res.end(JSON.stringify({ object: "list", data: [{ id: MODEL, object: "model" }] }));
+      return;
+    }
+
+    if (req.url === "/v1/chat/completions" && req.method === "POST") {
+      let body = {};
+      try { body = JSON.parse(raw); } catch { /* ignore */ }
+
+      const wantsStream = body.stream === true;
+      received.chatCompletions += 1;
+      if (wantsStream) {
+        received.streamingChatCompletions += 1;
+      }
+
+      if (wantsStream) {
+        res.writeHead(200, {
+          "Content-Type": "text/event-stream",
+          "Cache-Control": "no-cache",
+          Connection: "keep-alive",
+        });
+        res.end(streamingChunks(body.model ?? MODEL));
+      } else {
+        res.writeHead(200, { "Content-Type": "application/json" });
+        res.end(nonStreamingResponse(body.model ?? MODEL));
+      }
+      return;
+    }
+
+    if (req.url === "/v1/responses" && req.method === "POST") {
+      let body = {};
+      try { body = JSON.parse(raw); } catch { /* ignore */ }
+
+      const wantsStream = body.stream === true;
+      received.responses += 1;
+      if (wantsStream) {
+        received.streamingResponses += 1;
+      }
+
+      if (wantsStream) {
+        res.writeHead(200, {
+          "Content-Type": "text/event-stream",
+          "Cache-Control": "no-cache",
+          Connection: "keep-alive",
+        });
+        res.end(streamingResponseEvents(body.model ?? MODEL));
+      } else {
+        res.writeHead(200, { "Content-Type": "application/json" });
+        res.end(JSON.stringify(responseObject(body.model ?? MODEL)));
+      }
+      return;
+    }
+
+    res.writeHead(404, { "Content-Type": "application/json" });
+    res.end(JSON.stringify({ error: { message: "not found", type: "invalid_request_error" } }));
+  });
+});
+
+server.listen(PORT, "127.0.0.1", () => {
+  console.error(`[mock-llm] listening on http://127.0.0.1:${PORT}`);
+});
+
+function writeResult() {
+  fs.writeFileSync(RESULT_FILE, JSON.stringify(received, null, 2));
+  console.error(`[mock-llm] result written to ${RESULT_FILE}:`, received);
+}
+
+process.on("SIGTERM", () => {
+  writeResult();
+  server.close(() => process.exit(0));
+});
+
+process.on("SIGINT", () => {
+  writeResult();
+  server.close(() => process.exit(0));
+});
diff --git a/scripts/mock-opik-server.mjs b/scripts/mock-opik-server.mjs
new file mode 100644
index 0000000..bc2833f
--- /dev/null
+++ b/scripts/mock-opik-server.mjs
@@ -0,0 +1,107 @@
+#!/usr/bin/env node
+/**
+ * Mock Opik API server for E2E tests.
+ *
+ * Accepts the Opik trace/span batch and patch endpoints and records every
+ * payload it receives. On SIGTERM (or when the gateway flushes and stops),
+ * it writes a summary to E2E_RESULT_FILE (default: e2e-result.json) and exits.
+ *
+ * The check-e2e-result.mjs script reads that file and fails the test if
+ * no traces or spans were received.
+ */
+
+import http from "node:http";
+import fs from "node:fs";
+import path from "node:path";
+
+const PORT = parseInt(process.env.MOCK_OPIK_PORT ?? "18791", 10);
+const RESULT_FILE = process.env.E2E_RESULT_FILE ?? "e2e-result.json";
+
+const received = {
+  traces: 0,
+  spans: 0,
+  endedTraces: 0,
+  endedSpans: 0,
+  tracePatches: 0,
+  spanPatches: 0,
+  requests: [],
+};
+
+function record(method, url, body) {
+  console.error(`[mock-opik] ${method} ${url}`);
+  received.requests.push({ method, url, bodyLength: JSON.stringify(body).length });
+
+  if (method === "POST" && url.includes("/traces/batch")) {
+    const traces = body?.traces ?? [];
+    received.traces += traces.length;
+    received.endedTraces += traces.filter((trace) => trace?.endTime !== undefined || trace?.end_time !== undefined).length;
+  } else if (method === "POST" && url.includes("/spans/batch")) {
+    const spans = body?.spans ?? [];
+    received.spans += spans.length;
+    received.endedSpans += spans.filter((span) => span?.endTime !== undefined || span?.end_time !== undefined).length;
+  } else if (method === "PATCH" && url.match(/\/traces\/[^/]+$/)) {
+    received.tracePatches += 1;
+  } else if (method === "PATCH" && url.match(/\/spans\/[^/]+$/)) {
+    received.spanPatches += 1;
+  }
+}
+
+const server = http.createServer((req, res) => {
+  let raw = "";
+  req.on("data", (chunk) => (raw += chunk));
+  req.on("end", () => {
+    let body = {};
+    try {
+      body = JSON.parse(raw || "{}");
+    } catch {
+      // ignore parse errors for non-JSON bodies
+    }
+
+    record(req.method, req.url, body);
+
+    // Respond 200/204 to everything so the plugin doesn't retry.
+    const status =
+      req.method === "GET" ? 200 : req.method === "DELETE" ? 204 : 200;
+
+    res.writeHead(status, { "Content-Type": "application/json" });
+
+    // Return minimal responses for the endpoints Opik SDK reads back.
+    if (req.url?.includes("/projects") && req.method === "GET") {
+      res.end(JSON.stringify({ content: [{ id: "mock-project-id", name: "e2e-test" }] }));
+    } else if (req.url?.includes("/traces/batch") && req.method === "POST") {
+      res.end(JSON.stringify({}));
+    } else if (req.url?.includes("/spans/batch") && req.method === "POST") {
+      res.end(JSON.stringify({}));
+    } else {
+      res.end(JSON.stringify({}));
+    }
+  });
+});
+
+server.listen(PORT, "127.0.0.1", () => {
+  console.error(`[mock-opik] listening on http://127.0.0.1:${PORT}`);
+});
+
+function writeResult() {
+  const summary = {
+    traces: received.traces,
+    spans: received.spans,
+    endedTraces: received.endedTraces,
+    endedSpans: received.endedSpans,
+    tracePatches: received.tracePatches,
+    spanPatches: received.spanPatches,
+    totalRequests: received.requests.length,
+  };
+  fs.writeFileSync(RESULT_FILE, JSON.stringify(summary, null, 2));
+  console.error(`[mock-opik] result written to ${RESULT_FILE}:`, summary);
+}
+
+process.on("SIGTERM", () => {
+  writeResult();
+  server.close(() => process.exit(0));
+});
+
+process.on("SIGINT", () => {
+  writeResult();
+  server.close(() => process.exit(0));
+});