Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
232 changes: 232 additions & 0 deletions .github/workflows/e2e.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,232 @@
name: E2E Integration

# Validates that the plugin works end-to-end against the latest published
# OpenClaw release. Installs OpenClaw fresh, builds and installs the plugin
# from source, starts the gateway with a mock LLM + mock Opik server, runs
# a real agent turn, and asserts that traces and spans were exported.
#
# This catches regressions caused by OpenClaw plugin lifecycle changes that
# unit tests cannot detect.

permissions:
contents: read

on:
push:
branches: [main]
paths:
- ".github/workflows/e2e.yml"
- "index.ts"
- "src/**"
- "scripts/**"
- "package.json"
pull_request:
branches: [main]
paths:
- ".github/workflows/e2e.yml"
- "index.ts"
- "src/**"
- "scripts/**"
- "package.json"
schedule:
- cron: '0 6 * * *' # daily at 6am UTC — catches new OpenClaw releases breaking the plugin
workflow_dispatch:

jobs:
e2e:
name: E2E against OpenClaw ${{ matrix.openclaw-version }}
runs-on: ubuntu-latest
timeout-minutes: 15

strategy:
fail-fast: false
matrix:
openclaw-version:
- latest

env:
E2E_RESULT_FILE: e2e-result.json
E2E_LLM_RESULT_FILE: e2e-llm-result.json
OPENCLAW_GATEWAY_TOKEN: e2e-test-token

steps:
- name: Checkout plugin source
uses: actions/checkout@v5

- name: Setup Node.js
uses: actions/setup-node@v5
with:
node-version: "22.x"
cache: "npm"

- name: Setup npm
run: npm install -g npm@11.6.2

- name: Setup npm global prefix for caching
run: |
npm config set prefix ~/.npm-global
echo "$HOME/.npm-global/bin" >> $GITHUB_PATH

- name: Cache OpenClaw install
uses: actions/cache@v4
with:
path: ~/.npm-global
key: openclaw-${{ matrix.openclaw-version }}-${{ runner.os }}-${{ runner.arch }}

- name: Install OpenClaw (${{ matrix.openclaw-version }})
run: npm install -g openclaw@${{ matrix.openclaw-version }}

- name: Print OpenClaw version
run: openclaw --version

- name: Install plugin dependencies
run: npm ci

- name: Build plugin tarball
run: npm pack

- name: Write OpenClaw config
run: |
mkdir -p ~/.openclaw/agents/main/sessions
cat > ~/.openclaw/openclaw.json << 'EOF'
{
"gateway": {
"mode": "local",
"bind": "loopback",
"auth": { "mode": "token", "token": "e2e-test-token" },
"port": 18789
},
"agents": {
"defaults": {
"model": {
"primary": "mock-openai/gpt-4o-mini"
}
}
},
"models": {
"mode": "merge",
"providers": {
"mock-openai": {
"baseUrl": "http://127.0.0.1:18790/v1",
"apiKey": "mock-key",
"authHeader": true,
"api": "openai-responses",
"models": [
{
"id": "gpt-4o-mini",
"name": "Mock GPT-4o Mini",
"reasoning": false,
"input": ["text"],
"cost": {
"input": 0,
"output": 0,
"cacheRead": 0,
"cacheWrite": 0
},
"contextWindow": 128000,
"maxTokens": 16384
}
]
}
}
},
"plugins": {
"allow": ["opik-openclaw"],
"entries": {
"opik-openclaw": {
"enabled": true,
"config": {
"enabled": true,
"apiUrl": "http://127.0.0.1:18791",
"apiKey": "mock-key",
"projectName": "e2e-test",
"workspaceName": "default"
}
}
}
}
}
EOF

- name: Install plugin from tarball
run: openclaw plugins install ./opik-opik-openclaw-*.tgz

- name: Start mock Opik server
run: |
node scripts/mock-opik-server.mjs > mock-opik.log 2>&1 &
echo $! > mock-opik.pid
env:
MOCK_OPIK_PORT: "18791"

- name: Start mock LLM server
run: |
node scripts/mock-llm-server.mjs > mock-llm.log 2>&1 &
echo $! > mock-llm.pid
env:
MOCK_LLM_PORT: "18790"

- name: Wait for mock servers to be ready
run: |
for i in $(seq 1 10); do
curl -sf http://127.0.0.1:18791/health > /dev/null 2>&1 && \
curl -sf http://127.0.0.1:18790/v1/models > /dev/null 2>&1 && break
sleep 1
done

- name: Start OpenClaw gateway
run: |
openclaw gateway run > gateway.log 2>&1 &
echo $! > gateway.pid

- name: Wait for gateway to be ready
run: |
for i in $(seq 1 15); do
openclaw health > /dev/null 2>&1 && break
sleep 1
done
openclaw health

- name: Run agent turn
run: |
set -o pipefail
openclaw agent --agent main --message "ping" --deliver 2>&1 | tee agent-output.log
if grep -q "falling back to embedded" agent-output.log; then
echo "[e2e] FAIL: gateway turn fell back to embedded mode"
exit 1
fi
timeout-minutes: 2

- name: Stop gateway and flush traces
run: |
openclaw gateway stop || true
if [ -f gateway.pid ]; then
kill "$(cat gateway.pid)" > /dev/null 2>&1 || true
fi
sleep 2

- name: Stop mock servers and collect results
run: |
if [ -f mock-opik.pid ]; then
kill "$(cat mock-opik.pid)" > /dev/null 2>&1 || true
fi
if [ -f mock-llm.pid ]; then
kill "$(cat mock-llm.pid)" > /dev/null 2>&1 || true
fi
sleep 1

- name: Assert E2E results
run: node scripts/check-e2e-result.mjs

- name: Upload E2E result on failure
if: failure()
uses: actions/upload-artifact@v4
with:
name: e2e-debug
path: |
e2e-result.json
e2e-llm-result.json
agent-output.log
gateway.log
mock-opik.log
mock-llm.log
if-no-files-found: ignore
71 changes: 71 additions & 0 deletions scripts/check-e2e-result.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
#!/usr/bin/env node
/**
* Reads the E2E result file written by mock-opik-server.mjs and exits non-zero
* if the minimum expected trace/span counts were not met.
*/

import fs from "node:fs";

const RESULT_FILE = process.env.E2E_RESULT_FILE ?? "e2e-result.json";
const LLM_RESULT_FILE = process.env.E2E_LLM_RESULT_FILE ?? "e2e-llm-result.json";

if (!fs.existsSync(RESULT_FILE)) {
console.error(`[check-e2e] FAIL: result file not found: ${RESULT_FILE}`);
console.error(" The mock Opik server may not have written its result (SIGTERM not received?).");
process.exit(1);
}

const result = JSON.parse(fs.readFileSync(RESULT_FILE, "utf8"));
console.log("[check-e2e] result:", result);

if (!fs.existsSync(LLM_RESULT_FILE)) {
console.error(`[check-e2e] FAIL: LLM result file not found: ${LLM_RESULT_FILE}`);
console.error(" The mock LLM server may not have written its result (SIGTERM not received?).");
process.exit(1);
}

const llmResult = JSON.parse(fs.readFileSync(LLM_RESULT_FILE, "utf8"));
console.log("[check-e2e] llm result:", llmResult);

const failures = [];
const llmGenerationRequests = (llmResult.responses ?? 0) + (llmResult.chatCompletions ?? 0);
const traceFinalizations = (result.tracePatches ?? 0) + (result.endedTraces ?? 0);
const spanFinalizations = (result.spanPatches ?? 0) + (result.endedSpans ?? 0);

if (result.traces < 1) {
failures.push(`Expected ≥1 trace batch, got ${result.traces}`);
}

if (result.spans < 1) {
failures.push(`Expected ≥1 span batch, got ${result.spans}`);
}

if (traceFinalizations < 1) {
failures.push(
`Expected ≥1 finalized trace (patch or batch endTime), got patches=${result.tracePatches ?? 0} ended=${result.endedTraces ?? 0}`,
);
}

if (spanFinalizations < 1) {
failures.push(
`Expected ≥1 finalized span (patch or batch endTime), got patches=${result.spanPatches ?? 0} ended=${result.endedSpans ?? 0}`,
);
}

if (result.totalRequests < 1) {
failures.push("No requests at all reached the mock Opik server — plugin hooks may not have fired");
}

if (llmGenerationRequests < 1) {
failures.push(
`Expected ≥1 mock LLM generation request, got ${llmGenerationRequests}`,
);
}

if (failures.length > 0) {
console.error("[check-e2e] FAIL:");
for (const f of failures) console.error(" •", f);
process.exit(1);
}

console.log("[check-e2e] PASS — traces, spans, patches, and mock LLM traffic were observed");
Loading
Loading