comet-ml · vincentkoc · Apr 21, 2026 · Apr 10, 2026 · Apr 10, 2026 · Apr 10, 2026
@@ -0,0 +1,232 @@
+name: E2E Integration
+
+# Validates that the plugin works end-to-end against the latest published
+# OpenClaw release. Installs OpenClaw fresh, builds and installs the plugin
+# from source, starts the gateway with a mock LLM + mock Opik server, runs
+# a real agent turn, and asserts that traces and spans were exported.
+#
+# This catches regressions caused by OpenClaw plugin lifecycle changes that
+# unit tests cannot detect.
+
+permissions:
+  contents: read
+
+on:
+  push:
+    branches: [main]
+    paths:
+      - ".github/workflows/e2e.yml"
+      - "index.ts"
+      - "src/**"
+      - "scripts/**"
+      - "package.json"
+  pull_request:
+    branches: [main]
+    paths:
+      - ".github/workflows/e2e.yml"
+      - "index.ts"
+      - "src/**"
+      - "scripts/**"
+      - "package.json"
+  schedule:
+    - cron: '0 6 * * *'  # daily at 6am UTC — catches new OpenClaw releases breaking the plugin
+  workflow_dispatch:
+
+jobs:
+  e2e:
+    name: E2E against OpenClaw ${{ matrix.openclaw-version }}
+    runs-on: ubuntu-latest
+    timeout-minutes: 15
+
+    strategy:
+      fail-fast: false
+      matrix:
+        openclaw-version:
+          - latest
+
+    env:
+      E2E_RESULT_FILE: e2e-result.json
+      E2E_LLM_RESULT_FILE: e2e-llm-result.json
+      OPENCLAW_GATEWAY_TOKEN: e2e-test-token
+
+    steps:
+      - name: Checkout plugin source
+        uses: actions/checkout@v5
+
+      - name: Setup Node.js
+        uses: actions/setup-node@v5
+        with:
+          node-version: "22.x"
+          cache: "npm"
+
+      - name: Setup npm
+        run: npm install -g npm@11.6.2
+
+      - name: Setup npm global prefix for caching
+        run: |
+          npm config set prefix ~/.npm-global
+          echo "$HOME/.npm-global/bin" >> $GITHUB_PATH
+
+      - name: Cache OpenClaw install
+        uses: actions/cache@v4
+        with:
+          path: ~/.npm-global
+          key: openclaw-${{ matrix.openclaw-version }}-${{ runner.os }}-${{ runner.arch }}
+
+      - name: Install OpenClaw (${{ matrix.openclaw-version }})
+        run: npm install -g openclaw@${{ matrix.openclaw-version }}
+
+      - name: Print OpenClaw version
+        run: openclaw --version
+
+      - name: Install plugin dependencies
+        run: npm ci
+
+      - name: Build plugin tarball
+        run: npm pack
+
+      - name: Write OpenClaw config
+        run: |
+          mkdir -p ~/.openclaw/agents/main/sessions
+          cat > ~/.openclaw/openclaw.json << 'EOF'
+          {
+            "gateway": {
+              "mode": "local",
+              "bind": "loopback",
+              "auth": { "mode": "token", "token": "e2e-test-token" },
+              "port": 18789
+            },
+            "agents": {
+              "defaults": {
+                "model": {
+                  "primary": "mock-openai/gpt-4o-mini"
+                }
+              }
+            },
+            "models": {
+              "mode": "merge",
+              "providers": {
+                "mock-openai": {
+                  "baseUrl": "http://127.0.0.1:18790/v1",
+                  "apiKey": "mock-key",
+                  "authHeader": true,
+                  "api": "openai-responses",
+                  "models": [
+                    {
+                      "id": "gpt-4o-mini",
+                      "name": "Mock GPT-4o Mini",
+                      "reasoning": false,
+                      "input": ["text"],
+                      "cost": {
+                        "input": 0,
+                        "output": 0,
+                        "cacheRead": 0,
+                        "cacheWrite": 0
+                      },
+                      "contextWindow": 128000,
+                      "maxTokens": 16384
+                    }
+                  ]
+                }
+              }
+            },
+            "plugins": {
+              "allow": ["opik-openclaw"],
+              "entries": {
+                "opik-openclaw": {
+                  "enabled": true,
+                  "config": {
+                    "enabled": true,
+                    "apiUrl": "http://127.0.0.1:18791",
+                    "apiKey": "mock-key",
+                    "projectName": "e2e-test",
+                    "workspaceName": "default"
+                  }
+                }
+              }
+            }
+          }
+          EOF
+
+      - name: Install plugin from tarball
+        run: openclaw plugins install ./opik-opik-openclaw-*.tgz
+
+      - name: Start mock Opik server
+        run: |
+          node scripts/mock-opik-server.mjs > mock-opik.log 2>&1 &
+          echo $! > mock-opik.pid
+        env:
+          MOCK_OPIK_PORT: "18791"
+
+      - name: Start mock LLM server
+        run: |
+          node scripts/mock-llm-server.mjs > mock-llm.log 2>&1 &
+          echo $! > mock-llm.pid
+        env:
+          MOCK_LLM_PORT: "18790"
+
+      - name: Wait for mock servers to be ready
+        run: |
+          for i in $(seq 1 10); do
+            curl -sf http://127.0.0.1:18791/health > /dev/null 2>&1 && \
+            curl -sf http://127.0.0.1:18790/v1/models > /dev/null 2>&1 && break
+            sleep 1
+          done
+
+      - name: Start OpenClaw gateway
+        run: |
+          openclaw gateway run > gateway.log 2>&1 &
+          echo $! > gateway.pid
+
+      - name: Wait for gateway to be ready
+        run: |
+          for i in $(seq 1 15); do
+            openclaw health > /dev/null 2>&1 && break
+            sleep 1
+          done
+          openclaw health
+
+      - name: Run agent turn
+        run: |
+          set -o pipefail
+          openclaw agent --agent main --message "ping" --deliver 2>&1 | tee agent-output.log
+          if grep -q "falling back to embedded" agent-output.log; then
+            echo "[e2e] FAIL: gateway turn fell back to embedded mode"
+            exit 1
+          fi
+        timeout-minutes: 2
+
+      - name: Stop gateway and flush traces
+        run: |
+          openclaw gateway stop || true
+          if [ -f gateway.pid ]; then
+            kill "$(cat gateway.pid)" > /dev/null 2>&1 || true
+          fi
+          sleep 2
+
+      - name: Stop mock servers and collect results
+        run: |
+          if [ -f mock-opik.pid ]; then
+            kill "$(cat mock-opik.pid)" > /dev/null 2>&1 || true
+          fi
+          if [ -f mock-llm.pid ]; then
+            kill "$(cat mock-llm.pid)" > /dev/null 2>&1 || true
+          fi
+          sleep 1
+
+      - name: Assert E2E results
+        run: node scripts/check-e2e-result.mjs
+
+      - name: Upload E2E result on failure
+        if: failure()
+        uses: actions/upload-artifact@v4
+        with:
+          name: e2e-debug
+          path: |
+            e2e-result.json
+            e2e-llm-result.json
+            agent-output.log
+            gateway.log
+            mock-opik.log
+            mock-llm.log
+          if-no-files-found: ignore
diff --git a/scripts/check-e2e-result.mjs b/scripts/check-e2e-result.mjs
@@ -0,0 +1,71 @@
+#!/usr/bin/env node
+/**
+ * Reads the E2E result file written by mock-opik-server.mjs and exits non-zero
+ * if the minimum expected trace/span counts were not met.
+ */
+
+import fs from "node:fs";
+
+const RESULT_FILE = process.env.E2E_RESULT_FILE ?? "e2e-result.json";
+const LLM_RESULT_FILE = process.env.E2E_LLM_RESULT_FILE ?? "e2e-llm-result.json";
+
+if (!fs.existsSync(RESULT_FILE)) {
+  console.error(`[check-e2e] FAIL: result file not found: ${RESULT_FILE}`);
+  console.error("  The mock Opik server may not have written its result (SIGTERM not received?).");
+  process.exit(1);
+}
+
+const result = JSON.parse(fs.readFileSync(RESULT_FILE, "utf8"));
+console.log("[check-e2e] result:", result);
+
+if (!fs.existsSync(LLM_RESULT_FILE)) {
+  console.error(`[check-e2e] FAIL: LLM result file not found: ${LLM_RESULT_FILE}`);
+  console.error("  The mock LLM server may not have written its result (SIGTERM not received?).");
+  process.exit(1);
+}
+
+const llmResult = JSON.parse(fs.readFileSync(LLM_RESULT_FILE, "utf8"));
+console.log("[check-e2e] llm result:", llmResult);
+
+const failures = [];
+const llmGenerationRequests = (llmResult.responses ?? 0) + (llmResult.chatCompletions ?? 0);
+const traceFinalizations = (result.tracePatches ?? 0) + (result.endedTraces ?? 0);
+const spanFinalizations = (result.spanPatches ?? 0) + (result.endedSpans ?? 0);
+
+if (result.traces < 1) {
+  failures.push(`Expected ≥1 trace batch, got ${result.traces}`);
+}
+
+if (result.spans < 1) {
+  failures.push(`Expected ≥1 span batch, got ${result.spans}`);
+}
+
+if (traceFinalizations < 1) {
+  failures.push(
+    `Expected ≥1 finalized trace (patch or batch endTime), got patches=${result.tracePatches ?? 0} ended=${result.endedTraces ?? 0}`,
+  );
+}
+
+if (spanFinalizations < 1) {
+  failures.push(
+    `Expected ≥1 finalized span (patch or batch endTime), got patches=${result.spanPatches ?? 0} ended=${result.endedSpans ?? 0}`,
+  );
+}
+
+if (result.totalRequests < 1) {
+  failures.push("No requests at all reached the mock Opik server — plugin hooks may not have fired");
+}
+
+if (llmGenerationRequests < 1) {
+  failures.push(
+    `Expected ≥1 mock LLM generation request, got ${llmGenerationRequests}`,
+  );
+}
+
+if (failures.length > 0) {
+  console.error("[check-e2e] FAIL:");
+  for (const f of failures) console.error("  •", f);
+  process.exit(1);
+}
+
+console.log("[check-e2e] PASS — traces, spans, patches, and mock LLM traffic were observed");