-
Notifications
You must be signed in to change notification settings - Fork 354
Expand file tree
/
Copy pathstart_mcp_gateway.sh
More file actions
executable file
·469 lines (418 loc) · 17.3 KB
/
start_mcp_gateway.sh
File metadata and controls
executable file
·469 lines (418 loc) · 17.3 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
#!/usr/bin/env bash
# Start MCP Gateway
# This script starts the MCP gateway process that proxies MCP servers through a unified HTTP endpoint
# Following the MCP Gateway Specification: https://github.com/github/gh-aw/blob/main/docs/src/content/docs/reference/mcp-gateway.md
# Per MCP Gateway Specification v1.0.0: Only container-based execution is supported.
#
# This script reads the MCP configuration from stdin and pipes it to the gateway container.
set -e
# Restrict default file creation mode to owner-only (rw-------) for all new files.
# This prevents the race window between file creation via output redirection and
# a subsequent chmod, which would leave credential-bearing files world-readable
# (mode 0644) with a typical umask of 022.
umask 077
# Timing helper functions
print_timing() {
local start_time=$1
local label=$2
local end_time=$(date +%s%3N)
local duration=$((end_time - start_time))
echo "⏱️ TIMING: $label took ${duration}ms"
}
# Required environment variables:
# - MCP_GATEWAY_DOCKER_COMMAND: Container image to run (required)
# - MCP_GATEWAY_API_KEY: API key for gateway authentication (required for converter scripts)
# Validate that container is specified (command execution is not supported per spec)
if [ -z "$MCP_GATEWAY_DOCKER_COMMAND" ]; then
echo "ERROR: MCP_GATEWAY_DOCKER_COMMAND must be set (command-based execution is not supported per MCP Gateway Specification v1.0.0)"
exit 1
fi
# Create logs directory for gateway
mkdir -p /tmp/gh-aw/mcp-logs
# Guard against symlink attacks on the predictable /tmp/gh-aw/mcp-config path.
# An attacker who can create files in /tmp could pre-create /tmp/gh-aw or
# /tmp/gh-aw/mcp-config as symlinks and redirect our credential writes to an
# arbitrary location. Check both path components before and after creation.
if [ -L /tmp/gh-aw ]; then
echo "ERROR: /tmp/gh-aw is a symlink — possible symlink attack, aborting"
exit 1
fi
if [ -L /tmp/gh-aw/mcp-config ]; then
echo "ERROR: /tmp/gh-aw/mcp-config is a symlink — possible symlink attack, aborting"
exit 1
fi
mkdir -p /tmp/gh-aw/mcp-config
# Post-creation check: verify neither path was replaced with a symlink after mkdir
# (mitigates the TOCTOU window between the pre-check and mkdir).
if [ -L /tmp/gh-aw ] || [ -L /tmp/gh-aw/mcp-config ]; then
echo "ERROR: /tmp/gh-aw/mcp-config was replaced with a symlink — possible symlink attack, aborting"
exit 1
fi
# Restrict directory permissions so only the runner process owner can read config files
# (which contain bearer tokens and API keys)
chmod 700 /tmp/gh-aw/mcp-config
# Validate container syntax first (before accessing files)
# Container should be a valid docker command starting with "docker run"
if ! echo "$MCP_GATEWAY_DOCKER_COMMAND" | grep -qE '^docker run'; then
echo "ERROR: MCP_GATEWAY_DOCKER_COMMAND has incorrect syntax"
echo "Expected: docker run command with image and arguments"
echo "Got: $MCP_GATEWAY_DOCKER_COMMAND"
exit 1
fi
# Validate container command includes required flags
if ! echo "$MCP_GATEWAY_DOCKER_COMMAND" | grep -qE -- '-i'; then
echo "ERROR: MCP_GATEWAY_DOCKER_COMMAND must include -i flag for interactive mode"
exit 1
fi
if ! echo "$MCP_GATEWAY_DOCKER_COMMAND" | grep -qE -- '--rm'; then
echo "ERROR: MCP_GATEWAY_DOCKER_COMMAND must include --rm flag for cleanup"
exit 1
fi
if ! echo "$MCP_GATEWAY_DOCKER_COMMAND" | grep -qE -- '--network'; then
echo "ERROR: MCP_GATEWAY_DOCKER_COMMAND must include --network flag for networking"
exit 1
fi
# Start overall timing
SCRIPT_START_TIME=$(date +%s%3N)
# Read MCP configuration from stdin
echo "Reading MCP configuration from stdin..."
CONFIG_READ_START=$(date +%s%3N)
MCP_CONFIG=$(cat)
print_timing $CONFIG_READ_START "Configuration read from stdin"
echo ""
# Log the configuration for debugging
echo "-------START MCP CONFIG-----------"
echo "$MCP_CONFIG"
echo "-------END MCP CONFIG-----------"
echo ""
# Validate configuration is valid JSON
CONFIG_VALIDATION_START=$(date +%s%3N)
if ! echo "$MCP_CONFIG" | jq empty 2>/tmp/gh-aw/mcp-config/jq-error.log; then
echo "ERROR: Configuration is not valid JSON"
echo ""
echo "JSON validation error:"
if [ -f /tmp/gh-aw/mcp-config/jq-error.log ]; then
cat /tmp/gh-aw/mcp-config/jq-error.log
fi
echo ""
echo "Configuration content:"
echo "$MCP_CONFIG" | head -50
if [ $(echo "$MCP_CONFIG" | wc -l) -gt 50 ]; then
echo "... (truncated, showing first 50 lines)"
fi
exit 1
fi
# Validate gateway section exists and has required fields
echo "Validating gateway configuration..."
if ! echo "$MCP_CONFIG" | jq -e '.gateway' >/dev/null 2>&1; then
echo "ERROR: Configuration is missing required 'gateway' section"
echo "Per MCP Gateway Specification v1.0.0 section 4.1.3, the gateway section is required"
exit 1
fi
if ! echo "$MCP_CONFIG" | jq -e '.gateway.port' >/dev/null 2>&1; then
echo "ERROR: Gateway configuration is missing required 'port' field"
exit 1
fi
if ! echo "$MCP_CONFIG" | jq -e '.gateway.domain' >/dev/null 2>&1; then
echo "ERROR: Gateway configuration is missing required 'domain' field"
exit 1
fi
if ! echo "$MCP_CONFIG" | jq -e '.gateway.apiKey' >/dev/null 2>&1; then
echo "ERROR: Gateway configuration is missing required 'apiKey' field"
exit 1
fi
echo "Configuration validated successfully"
print_timing $CONFIG_VALIDATION_START "Configuration validation"
echo ""
# Set MCP_GATEWAY_LOG_DIR environment variable for use by the gateway
export MCP_GATEWAY_LOG_DIR="/tmp/gh-aw/mcp-logs/"
# Start gateway process with container
echo "Starting gateway with container: $MCP_GATEWAY_DOCKER_COMMAND"
echo "Full docker command: $MCP_GATEWAY_DOCKER_COMMAND"
echo ""
GATEWAY_START_TIME=$(date +%s%3N)
# Note: MCP_GATEWAY_DOCKER_COMMAND is the full docker command with all flags, mounts, and image
# Pass MCP_GATEWAY_LOG_DIR to the container via -e flag
echo "$MCP_CONFIG" | MCP_GATEWAY_LOG_DIR="$MCP_GATEWAY_LOG_DIR" $MCP_GATEWAY_DOCKER_COMMAND \
> /tmp/gh-aw/mcp-config/gateway-output.json 2> /tmp/gh-aw/mcp-logs/stderr.log &
GATEWAY_PID=$!
echo "Gateway started with PID: $GATEWAY_PID"
print_timing $GATEWAY_START_TIME "Gateway container launch"
echo "Verifying gateway process is running..."
if ps -p $GATEWAY_PID > /dev/null 2>&1; then
echo "Gateway process confirmed running (PID: $GATEWAY_PID)"
else
echo "ERROR: Gateway process exited immediately after start"
echo ""
echo "Gateway stdout output:"
cat /tmp/gh-aw/mcp-config/gateway-output.json 2>/dev/null || echo "No stdout output available"
echo ""
echo "Gateway stderr logs:"
cat /tmp/gh-aw/mcp-logs/stderr.log 2>/dev/null || echo "No stderr logs available"
exit 1
fi
echo ""
# Wait a few seconds for gateway to initialize before checking health
# This helps catch early failures before curl retries start
echo "Waiting for gateway to initialize..."
sleep 5
echo "Checking if gateway process is still alive after initialization..."
if ! ps -p $GATEWAY_PID > /dev/null 2>&1; then
echo "ERROR: Gateway process (PID: $GATEWAY_PID) exited during initialization"
WAIT_STATUS=$(wait $GATEWAY_PID 2>/dev/null; echo $?)
echo "Gateway exit status: $WAIT_STATUS"
echo ""
echo "Gateway stdout (errors are written here per MCP Gateway Specification):"
cat /tmp/gh-aw/mcp-config/gateway-output.json 2>/dev/null || echo "No stdout output available"
echo ""
echo "Gateway stderr logs (debug output):"
cat /tmp/gh-aw/mcp-logs/stderr.log || echo "No stderr logs available"
exit 1
fi
echo "Gateway process is still running (PID: $GATEWAY_PID)"
echo ""
# Wait for gateway to be ready using /health endpoint
# Note: Gateway may take 40-50 seconds when starting multiple MCP servers
# (e.g., serena alone takes ~22 seconds to start)
echo "Waiting for gateway to be ready..."
HEALTH_CHECK_START=$(date +%s%3N)
# Use localhost for health check since:
# 1. This script runs on the host (not in a container)
# 2. The gateway uses --network host, so it's accessible on localhost
# Note: MCP_GATEWAY_DOMAIN may be set to host.docker.internal for use by containers,
# but the health check should always use localhost since we're running on the host.
HEALTH_CHECK_HOST="localhost"
echo "Health endpoint: http://${HEALTH_CHECK_HOST}:${MCP_GATEWAY_PORT}/health"
echo "(Note: MCP_GATEWAY_DOMAIN is '${MCP_GATEWAY_DOMAIN}' for container access)"
echo "Retrying up to 120 times with 1s delay (120s total timeout)"
echo ""
# Check health endpoint using localhost (since we're running on the host)
# Per MCP Gateway Specification v1.3.0, /health must return HTTP 200 with JSON body containing specVersion and gatewayVersion
# Custom retry loop with progress indication: 120 attempts with 1 second delay = 120s total
# Note: Disable errexit temporarily to capture curl exit code
set +e
MAX_RETRIES=120
RETRY_DELAY=1
RETRY_COUNT=0
HTTP_CODE=""
HEALTH_RESPONSE=""
CURL_EXIT_CODE=1
echo "=== Health Check Progress ==="
while [ $RETRY_COUNT -lt $MAX_RETRIES ]; do
RETRY_COUNT=$((RETRY_COUNT + 1))
RETRY_START=$(date +%s%3N)
# Calculate elapsed time since health check started
ELAPSED_MS=$(($(date +%s%3N) - HEALTH_CHECK_START))
ELAPSED_SEC=$((ELAPSED_MS / 1000))
# Show progress every 10 retries or on first attempt
if [ $((RETRY_COUNT % 10)) -eq 1 ] || [ $RETRY_COUNT -eq 1 ]; then
echo "Attempt $RETRY_COUNT/$MAX_RETRIES (${ELAPSED_SEC}s elapsed)..."
fi
# Try to connect to health endpoint
RESPONSE=$(curl -s --max-time 2 --connect-timeout 1 -w "\n%{http_code}" "http://${HEALTH_CHECK_HOST}:${MCP_GATEWAY_PORT}/health" 2>&1)
CURL_EXIT_CODE=$?
# Parse response
HTTP_CODE=$(echo "$RESPONSE" | tail -n 1)
HEALTH_RESPONSE=$(echo "$RESPONSE" | head -n -1)
# Check if we got a successful response
if [ "$HTTP_CODE" = "200" ] && [ -n "$HEALTH_RESPONSE" ]; then
echo "✓ Health check succeeded on attempt $RETRY_COUNT (${ELAPSED_SEC}s elapsed)"
break
fi
# If this is not the last attempt, wait before retrying
if [ $RETRY_COUNT -lt $MAX_RETRIES ]; then
sleep $RETRY_DELAY
fi
done
echo "=== End Health Check Progress ==="
echo ""
set -e
# Always log the health response for debugging
echo "Final curl exit code: $CURL_EXIT_CODE"
echo "Final HTTP code: $HTTP_CODE"
echo "Total attempts: $RETRY_COUNT"
if [ -n "$HEALTH_RESPONSE" ]; then
echo "Health response body: $HEALTH_RESPONSE"
else
echo "Health response body: (empty)"
fi
if [ "$HTTP_CODE" = "200" ] && [ -n "$HEALTH_RESPONSE" ]; then
echo "Gateway is ready!"
print_timing $HEALTH_CHECK_START "Health check wait"
else
echo ""
echo "ERROR: Gateway failed to become ready"
echo "Last HTTP code: $HTTP_CODE"
echo "Last health response: ${HEALTH_RESPONSE:-(empty)}"
echo ""
echo "Checking if gateway process is still alive..."
if ps -p $GATEWAY_PID > /dev/null 2>&1; then
echo "Gateway process (PID: $GATEWAY_PID) is still running"
else
echo "Gateway process (PID: $GATEWAY_PID) has exited"
WAIT_STATUS=$(wait $GATEWAY_PID 2>/dev/null; echo $?)
echo "Gateway exit status: $WAIT_STATUS"
fi
echo ""
echo "Docker container status:"
docker ps -a 2>/dev/null | head -20 || echo "Could not list docker containers"
echo ""
echo "Gateway stdout (errors are written here per MCP Gateway Specification):"
cat /tmp/gh-aw/mcp-config/gateway-output.json 2>/dev/null || echo "No stdout output available"
echo ""
echo "Gateway stderr logs (debug output):"
cat /tmp/gh-aw/mcp-logs/stderr.log || echo "No stderr logs available"
echo ""
echo "Checking network connectivity to gateway port..."
netstat -tlnp 2>/dev/null | grep ":${MCP_GATEWAY_PORT}" || ss -tlnp 2>/dev/null | grep ":${MCP_GATEWAY_PORT}" || echo "Port ${MCP_GATEWAY_PORT} does not appear to be listening"
kill $GATEWAY_PID 2>/dev/null || true
exit 1
fi
echo ""
# Wait for gateway output (rewritten configuration)
echo "Reading gateway output configuration..."
OUTPUT_WAIT_START=$(date +%s%3N)
WAIT_ATTEMPTS=10
WAIT_ATTEMPT=0
while [ $WAIT_ATTEMPT -lt $WAIT_ATTEMPTS ]; do
if [ -s /tmp/gh-aw/mcp-config/gateway-output.json ]; then
echo "Gateway output received!"
break
fi
WAIT_ATTEMPT=$((WAIT_ATTEMPT + 1))
if [ $WAIT_ATTEMPT -lt $WAIT_ATTEMPTS ]; then
sleep 1
fi
done
print_timing $OUTPUT_WAIT_START "Gateway output wait"
echo ""
# Verify output was written
if [ ! -s /tmp/gh-aw/mcp-config/gateway-output.json ]; then
echo "ERROR: Gateway did not write output configuration"
echo ""
echo "Gateway stdout (should contain error or config):"
cat /tmp/gh-aw/mcp-config/gateway-output.json 2>/dev/null || echo "No stdout output available"
echo ""
echo "Gateway stderr logs:"
cat /tmp/gh-aw/mcp-logs/stderr.log || echo "No stderr logs available"
kill $GATEWAY_PID 2>/dev/null || true
exit 1
fi
# Restrict gateway output file permissions - it contains the bearer token / API key
chmod 600 /tmp/gh-aw/mcp-config/gateway-output.json
# Check if output contains an error payload instead of valid configuration
# Per MCP Gateway Specification v1.0.0 section 9.1, errors are written to stdout as error payloads
if jq -e '.error' /tmp/gh-aw/mcp-config/gateway-output.json >/dev/null 2>&1; then
echo "ERROR: Gateway returned an error payload instead of configuration"
echo ""
echo "Gateway error details:"
cat /tmp/gh-aw/mcp-config/gateway-output.json
echo ""
echo "Gateway stderr logs:"
cat /tmp/gh-aw/mcp-logs/stderr.log || echo "No stderr logs available"
kill $GATEWAY_PID 2>/dev/null || true
exit 1
fi
# Convert gateway output to agent-specific format
echo "Converting gateway configuration to agent format..."
CONFIG_CONVERT_START=$(date +%s%3N)
export MCP_GATEWAY_OUTPUT=/tmp/gh-aw/mcp-config/gateway-output.json
# Validate MCP_GATEWAY_API_KEY is set (required by converter scripts)
if [ -z "$MCP_GATEWAY_API_KEY" ]; then
echo "ERROR: MCP_GATEWAY_API_KEY environment variable must be set for converter scripts"
echo "This variable should be set in the workflow before calling start_mcp_gateway.sh"
exit 1
fi
# Determine which agent-specific converter to use based on engine type
# Check for engine-specific indicators and call appropriate converter
if [ -n "$GH_AW_ENGINE" ]; then
ENGINE_TYPE="$GH_AW_ENGINE"
elif [ -f "/home/runner/.copilot" ] || [ -n "$GITHUB_COPILOT_CLI_MODE" ]; then
ENGINE_TYPE="copilot"
elif [ -f "/tmp/gh-aw/mcp-config/config.toml" ]; then
ENGINE_TYPE="codex"
elif [ -f "/tmp/gh-aw/mcp-config/mcp-servers.json" ]; then
ENGINE_TYPE="claude"
else
ENGINE_TYPE="unknown"
fi
echo "Detected engine type: $ENGINE_TYPE"
case "$ENGINE_TYPE" in
copilot)
echo "Using Copilot converter..."
bash ${RUNNER_TEMP}/gh-aw/actions/convert_gateway_config_copilot.sh
;;
codex)
echo "Using Codex converter..."
bash ${RUNNER_TEMP}/gh-aw/actions/convert_gateway_config_codex.sh
;;
claude)
echo "Using Claude converter..."
bash ${RUNNER_TEMP}/gh-aw/actions/convert_gateway_config_claude.sh
;;
gemini)
echo "Using Gemini converter..."
bash ${RUNNER_TEMP}/gh-aw/actions/convert_gateway_config_gemini.sh
;;
opencode)
echo "Using OpenCode converter..."
bash ${RUNNER_TEMP}/gh-aw/actions/convert_gateway_config_opencode.sh
;;
*)
echo "No agent-specific converter found for engine: $ENGINE_TYPE"
echo "Using gateway output directly"
# Default fallback - copy to most common location
mkdir -p /home/runner/.copilot
cp /tmp/gh-aw/mcp-config/gateway-output.json /home/runner/.copilot/mcp-config.json
cat /home/runner/.copilot/mcp-config.json
;;
esac
print_timing $CONFIG_CONVERT_START "Configuration conversion"
echo ""
# Check MCP server functionality
echo "Checking MCP server functionality..."
MCP_CHECK_START=$(date +%s%3N)
if [ -f ${RUNNER_TEMP}/gh-aw/actions/check_mcp_servers.sh ]; then
echo "Running MCP server checks..."
# Store check diagnostic logs in /tmp/gh-aw/mcp-logs/start-gateway.log for artifact upload
# Use tee to output to both stdout and the log file
# Enable pipefail so the exit code comes from check_mcp_servers.sh, not tee
set -o pipefail
if ! bash ${RUNNER_TEMP}/gh-aw/actions/check_mcp_servers.sh \
/tmp/gh-aw/mcp-config/gateway-output.json \
"http://localhost:${MCP_GATEWAY_PORT}" \
"${MCP_GATEWAY_API_KEY}" 2>&1 | tee /tmp/gh-aw/mcp-logs/start-gateway.log; then
echo "ERROR: MCP server checks failed - no servers could be connected"
echo "Gateway process will be terminated"
kill $GATEWAY_PID 2>/dev/null || true
exit 1
fi
set +o pipefail
print_timing $MCP_CHECK_START "MCP server connectivity checks"
else
echo "WARNING: MCP server check script not found at ${RUNNER_TEMP}/gh-aw/actions/check_mcp_servers.sh"
echo "Skipping MCP server functionality checks"
fi
echo ""
# Delete gateway configuration file after conversion and checks are complete
echo "Cleaning up gateway configuration file..."
if [ -f /tmp/gh-aw/mcp-config/gateway-output.json ]; then
rm /tmp/gh-aw/mcp-config/gateway-output.json
echo "Gateway configuration file deleted"
else
echo "Gateway configuration file not found (already deleted or never created)"
fi
echo ""
echo "MCP gateway is running:"
echo " - From host: http://localhost:${MCP_GATEWAY_PORT}"
echo " - From containers: http://${MCP_GATEWAY_DOMAIN}:${MCP_GATEWAY_PORT}"
echo "Gateway PID: $GATEWAY_PID"
print_timing $SCRIPT_START_TIME "Overall gateway startup"
echo ""
# Output PID as GitHub Actions step output for use in cleanup
# Output port and API key for use in stop script (per MCP Gateway Specification v1.1.0)
{
echo "gateway-pid=$GATEWAY_PID"
echo "gateway-port=${MCP_GATEWAY_PORT}"
echo "gateway-api-key=${MCP_GATEWAY_API_KEY}"
} >> "$GITHUB_OUTPUT"