diff --git a/.github/workflows/e2e.yml b/.github/workflows/e2e.yml new file mode 100644 index 0000000..5c36863 --- /dev/null +++ b/.github/workflows/e2e.yml @@ -0,0 +1,232 @@ +name: E2E Integration + +# Validates that the plugin works end-to-end against the latest published +# OpenClaw release. Installs OpenClaw fresh, builds and installs the plugin +# from source, starts the gateway with a mock LLM + mock Opik server, runs +# a real agent turn, and asserts that traces and spans were exported. +# +# This catches regressions caused by OpenClaw plugin lifecycle changes that +# unit tests cannot detect. + +permissions: + contents: read + +on: + push: + branches: [main] + paths: + - ".github/workflows/e2e.yml" + - "index.ts" + - "src/**" + - "scripts/**" + - "package.json" + pull_request: + branches: [main] + paths: + - ".github/workflows/e2e.yml" + - "index.ts" + - "src/**" + - "scripts/**" + - "package.json" + schedule: + - cron: '0 6 * * *' # daily at 6am UTC — catches new OpenClaw releases breaking the plugin + workflow_dispatch: + +jobs: + e2e: + name: E2E against OpenClaw ${{ matrix.openclaw-version }} + runs-on: ubuntu-latest + timeout-minutes: 15 + + strategy: + fail-fast: false + matrix: + openclaw-version: + - latest + + env: + E2E_RESULT_FILE: e2e-result.json + E2E_LLM_RESULT_FILE: e2e-llm-result.json + OPENCLAW_GATEWAY_TOKEN: e2e-test-token + + steps: + - name: Checkout plugin source + uses: actions/checkout@v5 + + - name: Setup Node.js + uses: actions/setup-node@v5 + with: + node-version: "22.x" + cache: "npm" + + - name: Setup npm + run: npm install -g npm@11.6.2 + + - name: Setup npm global prefix for caching + run: | + npm config set prefix ~/.npm-global + echo "$HOME/.npm-global/bin" >> $GITHUB_PATH + + - name: Cache OpenClaw install + uses: actions/cache@v4 + with: + path: ~/.npm-global + key: openclaw-${{ matrix.openclaw-version }}-${{ runner.os }}-${{ runner.arch }} + + - name: Install OpenClaw (${{ matrix.openclaw-version }}) + run: npm install -g openclaw@${{ matrix.openclaw-version }} + + - name: Print OpenClaw version + run: openclaw --version + + - name: Install plugin dependencies + run: npm ci + + - name: Build plugin tarball + run: npm pack + + - name: Write OpenClaw config + run: | + mkdir -p ~/.openclaw/agents/main/sessions + cat > ~/.openclaw/openclaw.json << 'EOF' + { + "gateway": { + "mode": "local", + "bind": "loopback", + "auth": { "mode": "token", "token": "e2e-test-token" }, + "port": 18789 + }, + "agents": { + "defaults": { + "model": { + "primary": "mock-openai/gpt-4o-mini" + } + } + }, + "models": { + "mode": "merge", + "providers": { + "mock-openai": { + "baseUrl": "http://127.0.0.1:18790/v1", + "apiKey": "mock-key", + "authHeader": true, + "api": "openai-responses", + "models": [ + { + "id": "gpt-4o-mini", + "name": "Mock GPT-4o Mini", + "reasoning": false, + "input": ["text"], + "cost": { + "input": 0, + "output": 0, + "cacheRead": 0, + "cacheWrite": 0 + }, + "contextWindow": 128000, + "maxTokens": 16384 + } + ] + } + } + }, + "plugins": { + "allow": ["opik-openclaw"], + "entries": { + "opik-openclaw": { + "enabled": true, + "config": { + "enabled": true, + "apiUrl": "http://127.0.0.1:18791", + "apiKey": "mock-key", + "projectName": "e2e-test", + "workspaceName": "default" + } + } + } + } + } + EOF + + - name: Install plugin from tarball + run: openclaw plugins install ./opik-opik-openclaw-*.tgz + + - name: Start mock Opik server + run: | + node scripts/mock-opik-server.mjs > mock-opik.log 2>&1 & + echo $! > mock-opik.pid + env: + MOCK_OPIK_PORT: "18791" + + - name: Start mock LLM server + run: | + node scripts/mock-llm-server.mjs > mock-llm.log 2>&1 & + echo $! > mock-llm.pid + env: + MOCK_LLM_PORT: "18790" + + - name: Wait for mock servers to be ready + run: | + for i in $(seq 1 10); do + curl -sf http://127.0.0.1:18791/health > /dev/null 2>&1 && \ + curl -sf http://127.0.0.1:18790/v1/models > /dev/null 2>&1 && break + sleep 1 + done + + - name: Start OpenClaw gateway + run: | + openclaw gateway run > gateway.log 2>&1 & + echo $! > gateway.pid + + - name: Wait for gateway to be ready + run: | + for i in $(seq 1 15); do + openclaw health > /dev/null 2>&1 && break + sleep 1 + done + openclaw health + + - name: Run agent turn + run: | + set -o pipefail + openclaw agent --agent main --message "ping" --deliver 2>&1 | tee agent-output.log + if grep -q "falling back to embedded" agent-output.log; then + echo "[e2e] FAIL: gateway turn fell back to embedded mode" + exit 1 + fi + timeout-minutes: 2 + + - name: Stop gateway and flush traces + run: | + openclaw gateway stop || true + if [ -f gateway.pid ]; then + kill "$(cat gateway.pid)" > /dev/null 2>&1 || true + fi + sleep 2 + + - name: Stop mock servers and collect results + run: | + if [ -f mock-opik.pid ]; then + kill "$(cat mock-opik.pid)" > /dev/null 2>&1 || true + fi + if [ -f mock-llm.pid ]; then + kill "$(cat mock-llm.pid)" > /dev/null 2>&1 || true + fi + sleep 1 + + - name: Assert E2E results + run: node scripts/check-e2e-result.mjs + + - name: Upload E2E result on failure + if: failure() + uses: actions/upload-artifact@v4 + with: + name: e2e-debug + path: | + e2e-result.json + e2e-llm-result.json + agent-output.log + gateway.log + mock-opik.log + mock-llm.log + if-no-files-found: ignore diff --git a/scripts/check-e2e-result.mjs b/scripts/check-e2e-result.mjs new file mode 100644 index 0000000..aa844eb --- /dev/null +++ b/scripts/check-e2e-result.mjs @@ -0,0 +1,71 @@ +#!/usr/bin/env node +/** + * Reads the E2E result file written by mock-opik-server.mjs and exits non-zero + * if the minimum expected trace/span counts were not met. + */ + +import fs from "node:fs"; + +const RESULT_FILE = process.env.E2E_RESULT_FILE ?? "e2e-result.json"; +const LLM_RESULT_FILE = process.env.E2E_LLM_RESULT_FILE ?? "e2e-llm-result.json"; + +if (!fs.existsSync(RESULT_FILE)) { + console.error(`[check-e2e] FAIL: result file not found: ${RESULT_FILE}`); + console.error(" The mock Opik server may not have written its result (SIGTERM not received?)."); + process.exit(1); +} + +const result = JSON.parse(fs.readFileSync(RESULT_FILE, "utf8")); +console.log("[check-e2e] result:", result); + +if (!fs.existsSync(LLM_RESULT_FILE)) { + console.error(`[check-e2e] FAIL: LLM result file not found: ${LLM_RESULT_FILE}`); + console.error(" The mock LLM server may not have written its result (SIGTERM not received?)."); + process.exit(1); +} + +const llmResult = JSON.parse(fs.readFileSync(LLM_RESULT_FILE, "utf8")); +console.log("[check-e2e] llm result:", llmResult); + +const failures = []; +const llmGenerationRequests = (llmResult.responses ?? 0) + (llmResult.chatCompletions ?? 0); +const traceFinalizations = (result.tracePatches ?? 0) + (result.endedTraces ?? 0); +const spanFinalizations = (result.spanPatches ?? 0) + (result.endedSpans ?? 0); + +if (result.traces < 1) { + failures.push(`Expected ≥1 trace batch, got ${result.traces}`); +} + +if (result.spans < 1) { + failures.push(`Expected ≥1 span batch, got ${result.spans}`); +} + +if (traceFinalizations < 1) { + failures.push( + `Expected ≥1 finalized trace (patch or batch endTime), got patches=${result.tracePatches ?? 0} ended=${result.endedTraces ?? 0}`, + ); +} + +if (spanFinalizations < 1) { + failures.push( + `Expected ≥1 finalized span (patch or batch endTime), got patches=${result.spanPatches ?? 0} ended=${result.endedSpans ?? 0}`, + ); +} + +if (result.totalRequests < 1) { + failures.push("No requests at all reached the mock Opik server — plugin hooks may not have fired"); +} + +if (llmGenerationRequests < 1) { + failures.push( + `Expected ≥1 mock LLM generation request, got ${llmGenerationRequests}`, + ); +} + +if (failures.length > 0) { + console.error("[check-e2e] FAIL:"); + for (const f of failures) console.error(" •", f); + process.exit(1); +} + +console.log("[check-e2e] PASS — traces, spans, patches, and mock LLM traffic were observed"); diff --git a/scripts/mock-llm-server.mjs b/scripts/mock-llm-server.mjs new file mode 100644 index 0000000..415a642 --- /dev/null +++ b/scripts/mock-llm-server.mjs @@ -0,0 +1,190 @@ +#!/usr/bin/env node +/** + * Minimal OpenAI-compatible mock LLM server for E2E tests. + * + * Returns a canned chat completion response so the OpenClaw gateway can + * complete a full agent turn (llm_input → llm_output → agent_end) without + * a real model API key. + * + * Supports both streaming (SSE) and non-streaming responses because OpenClaw + * may request either depending on config. + */ + +import http from "node:http"; +import fs from "node:fs"; + +const PORT = parseInt(process.env.MOCK_LLM_PORT ?? "18790", 10); +const MODEL = "gpt-4o-mini"; +const RESULT_FILE = process.env.E2E_LLM_RESULT_FILE ?? "e2e-llm-result.json"; +const RESPONSE_TEXT = "pong"; + +const received = { + models: 0, + responses: 0, + streamingResponses: 0, + chatCompletions: 0, + streamingChatCompletions: 0, +}; + +function nonStreamingResponse(model) { + return JSON.stringify({ + id: "chatcmpl-e2e-mock", + object: "chat.completion", + created: Math.floor(Date.now() / 1000), + model, + choices: [ + { + index: 0, + message: { role: "assistant", content: RESPONSE_TEXT }, + finish_reason: "stop", + }, + ], + usage: { prompt_tokens: 5, completion_tokens: 2, total_tokens: 7 }, + }); +} + +function streamingChunks(model) { + const id = "chatcmpl-e2e-mock"; + const created = Math.floor(Date.now() / 1000); + + const chunks = [ + { id, object: "chat.completion.chunk", created, model, choices: [{ index: 0, delta: { role: "assistant", content: "" }, finish_reason: null }] }, + { id, object: "chat.completion.chunk", created, model, choices: [{ index: 0, delta: { content: RESPONSE_TEXT }, finish_reason: null }] }, + { id, object: "chat.completion.chunk", created, model, choices: [{ index: 0, delta: {}, finish_reason: "stop" }] }, + ]; + + return chunks.map((c) => `data: ${JSON.stringify(c)}\n\n`).join("") + "data: [DONE]\n\n"; +} + +function responseObject(model) { + return { + id: "resp-e2e-mock", + object: "response", + created_at: Math.floor(Date.now() / 1000), + model, + status: "completed", + output: [ + { + id: "msg-e2e-mock", + type: "message", + role: "assistant", + content: [ + { + type: "output_text", + text: RESPONSE_TEXT, + annotations: [], + }, + ], + }, + ], + output_text: RESPONSE_TEXT, + usage: { input_tokens: 5, output_tokens: 2, total_tokens: 7 }, + }; +} + +function streamingResponseEvents(model) { + const response = responseObject(model); + const message = response.output[0]; + const part = message.content[0]; + + const events = [ + ["response.created", { type: "response.created", response: { ...response, status: "in_progress", output: [] } }], + ["response.in_progress", { type: "response.in_progress", response: { ...response, status: "in_progress", output: [] } }], + ["response.output_item.added", { type: "response.output_item.added", output_index: 0, item: { ...message, content: [] } }], + ["response.content_part.added", { type: "response.content_part.added", output_index: 0, item_id: message.id, content_index: 0, part: { type: "output_text", text: "" } }], + ["response.output_text.delta", { type: "response.output_text.delta", output_index: 0, item_id: message.id, content_index: 0, delta: RESPONSE_TEXT }], + ["response.output_text.done", { type: "response.output_text.done", output_index: 0, item_id: message.id, content_index: 0, text: RESPONSE_TEXT }], + ["response.content_part.done", { type: "response.content_part.done", output_index: 0, item_id: message.id, content_index: 0, part }], + ["response.output_item.done", { type: "response.output_item.done", output_index: 0, item: message }], + ["response.completed", { type: "response.completed", response }], + ]; + + return events + .map(([event, data]) => `event: ${event}\ndata: ${JSON.stringify(data)}\n\n`) + .join("") + "data: [DONE]\n\n"; +} + +const server = http.createServer((req, res) => { + let raw = ""; + req.on("data", (chunk) => (raw += chunk)); + req.on("end", () => { + console.error(`[mock-llm] ${req.method} ${req.url}`); + + if (req.url === "/v1/models" && req.method === "GET") { + received.models += 1; + res.writeHead(200, { "Content-Type": "application/json" }); + res.end(JSON.stringify({ object: "list", data: [{ id: MODEL, object: "model" }] })); + return; + } + + if (req.url === "/v1/chat/completions" && req.method === "POST") { + let body = {}; + try { body = JSON.parse(raw); } catch { /* ignore */ } + + const wantsStream = body.stream === true; + received.chatCompletions += 1; + if (wantsStream) { + received.streamingChatCompletions += 1; + } + + if (wantsStream) { + res.writeHead(200, { + "Content-Type": "text/event-stream", + "Cache-Control": "no-cache", + Connection: "keep-alive", + }); + res.end(streamingChunks(body.model ?? MODEL)); + } else { + res.writeHead(200, { "Content-Type": "application/json" }); + res.end(nonStreamingResponse(body.model ?? MODEL)); + } + return; + } + + if (req.url === "/v1/responses" && req.method === "POST") { + let body = {}; + try { body = JSON.parse(raw); } catch { /* ignore */ } + + const wantsStream = body.stream === true; + received.responses += 1; + if (wantsStream) { + received.streamingResponses += 1; + } + + if (wantsStream) { + res.writeHead(200, { + "Content-Type": "text/event-stream", + "Cache-Control": "no-cache", + Connection: "keep-alive", + }); + res.end(streamingResponseEvents(body.model ?? MODEL)); + } else { + res.writeHead(200, { "Content-Type": "application/json" }); + res.end(JSON.stringify(responseObject(body.model ?? MODEL))); + } + return; + } + + res.writeHead(404, { "Content-Type": "application/json" }); + res.end(JSON.stringify({ error: { message: "not found", type: "invalid_request_error" } })); + }); +}); + +server.listen(PORT, "127.0.0.1", () => { + console.error(`[mock-llm] listening on http://127.0.0.1:${PORT}`); +}); + +function writeResult() { + fs.writeFileSync(RESULT_FILE, JSON.stringify(received, null, 2)); + console.error(`[mock-llm] result written to ${RESULT_FILE}:`, received); +} + +process.on("SIGTERM", () => { + writeResult(); + server.close(() => process.exit(0)); +}); + +process.on("SIGINT", () => { + writeResult(); + server.close(() => process.exit(0)); +}); diff --git a/scripts/mock-opik-server.mjs b/scripts/mock-opik-server.mjs new file mode 100644 index 0000000..bc2833f --- /dev/null +++ b/scripts/mock-opik-server.mjs @@ -0,0 +1,107 @@ +#!/usr/bin/env node +/** + * Mock Opik API server for E2E tests. + * + * Accepts the Opik trace/span batch and patch endpoints and records every + * payload it receives. On SIGTERM (or when the gateway flushes and stops), + * it writes a summary to E2E_RESULT_FILE (default: e2e-result.json) and exits. + * + * The check-e2e-result.mjs script reads that file and fails the test if + * no traces or spans were received. + */ + +import http from "node:http"; +import fs from "node:fs"; +import path from "node:path"; + +const PORT = parseInt(process.env.MOCK_OPIK_PORT ?? "18791", 10); +const RESULT_FILE = process.env.E2E_RESULT_FILE ?? "e2e-result.json"; + +const received = { + traces: 0, + spans: 0, + endedTraces: 0, + endedSpans: 0, + tracePatches: 0, + spanPatches: 0, + requests: [], +}; + +function record(method, url, body) { + console.error(`[mock-opik] ${method} ${url}`); + received.requests.push({ method, url, bodyLength: JSON.stringify(body).length }); + + if (method === "POST" && url.includes("/traces/batch")) { + const traces = body?.traces ?? []; + received.traces += traces.length; + received.endedTraces += traces.filter((trace) => trace?.endTime !== undefined || trace?.end_time !== undefined).length; + } else if (method === "POST" && url.includes("/spans/batch")) { + const spans = body?.spans ?? []; + received.spans += spans.length; + received.endedSpans += spans.filter((span) => span?.endTime !== undefined || span?.end_time !== undefined).length; + } else if (method === "PATCH" && url.match(/\/traces\/[^/]+$/)) { + received.tracePatches += 1; + } else if (method === "PATCH" && url.match(/\/spans\/[^/]+$/)) { + received.spanPatches += 1; + } +} + +const server = http.createServer((req, res) => { + let raw = ""; + req.on("data", (chunk) => (raw += chunk)); + req.on("end", () => { + let body = {}; + try { + body = JSON.parse(raw || "{}"); + } catch { + // ignore parse errors for non-JSON bodies + } + + record(req.method, req.url, body); + + // Respond 200/204 to everything so the plugin doesn't retry. + const status = + req.method === "GET" ? 200 : req.method === "DELETE" ? 204 : 200; + + res.writeHead(status, { "Content-Type": "application/json" }); + + // Return minimal responses for the endpoints Opik SDK reads back. + if (req.url?.includes("/projects") && req.method === "GET") { + res.end(JSON.stringify({ content: [{ id: "mock-project-id", name: "e2e-test" }] })); + } else if (req.url?.includes("/traces/batch") && req.method === "POST") { + res.end(JSON.stringify({})); + } else if (req.url?.includes("/spans/batch") && req.method === "POST") { + res.end(JSON.stringify({})); + } else { + res.end(JSON.stringify({})); + } + }); +}); + +server.listen(PORT, "127.0.0.1", () => { + console.error(`[mock-opik] listening on http://127.0.0.1:${PORT}`); +}); + +function writeResult() { + const summary = { + traces: received.traces, + spans: received.spans, + endedTraces: received.endedTraces, + endedSpans: received.endedSpans, + tracePatches: received.tracePatches, + spanPatches: received.spanPatches, + totalRequests: received.requests.length, + }; + fs.writeFileSync(RESULT_FILE, JSON.stringify(summary, null, 2)); + console.error(`[mock-opik] result written to ${RESULT_FILE}:`, summary); +} + +process.on("SIGTERM", () => { + writeResult(); + server.close(() => process.exit(0)); +}); + +process.on("SIGINT", () => { + writeResult(); + server.close(() => process.exit(0)); +});