diff --git a/.env b/.env index 3cffea919a..cd678fd422 100644 --- a/.env +++ b/.env @@ -34,6 +34,8 @@ OTEL_COLLECTOR_HOST=otel-collector OTEL_COLLECTOR_PORT_GRPC=4317 OTEL_COLLECTOR_PORT_HTTP=4318 OTEL_COLLECTOR_CONFIG=./src/otel-collector/otelcol-config.yml +OTEL_COLLECTOR_CONFIG_FULL=./src/otel-collector/otelcol-config-full.yml +OTEL_COLLECTOR_CONFIG_OBSERVABILITY=./src/otel-collector/otelcol-config-observability.yml OTEL_COLLECTOR_CONFIG_EXTRAS=./src/otel-collector/otelcol-config-extras.yml OTEL_EXPORTER_OTLP_ENDPOINT=http://${OTEL_COLLECTOR_HOST}:${OTEL_COLLECTOR_PORT_GRPC} PUBLIC_OTEL_EXPORTER_OTLP_TRACES_ENDPOINT=http://localhost:8080/otlp-http/v1/traces diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 5a41f319c3..cda476f66c 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -16,7 +16,7 @@ A Pull Request that modifies instrumentation code will likely require an update in docs. Please make sure to update the opentelemetry.io repo with any docs changes. -A Pull Request that modifies docker-compose.yaml, otelcol-config.yaml, or +A Pull Request that modifies compose*.yaml, otelcol-config*.yml, or Grafana dashboards will likely require an update to the Demo Helm chart. Other changes affecting how a service is deployed will also likely require an update to the Demo Helm chart. diff --git a/.github/workflows/label-pr.yml b/.github/workflows/label-pr.yml index a43885f42a..f2893cdce5 100644 --- a/.github/workflows/label-pr.yml +++ b/.github/workflows/label-pr.yml @@ -30,7 +30,7 @@ jobs: - 'src/flagd/**' helmUpdateRequired: - '.env' - - 'docker-compose*.yml' + - 'compose*.yaml' - 'src/flagd/**' - 'src/grafana/**' - 'src/jaeger/**' diff --git a/.gitignore b/.gitignore index 2c7b1982e4..35570a8ea6 100644 --- a/.gitignore +++ b/.gitignore @@ -61,4 +61,3 @@ src/shipping/target/ test/tracetesting/tracetesting-vars.yaml !src/currency/build - diff --git a/CHANGELOG.md b/CHANGELOG.md index fe7d0b3b0c..658efdc620 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -61,6 +61,9 @@ the release. * [load-generator] Wait for Roof Binoculars image to load in web tasks, and fix task failures due to missing `tracer` attribute ([#3171](https://github.com/open-telemetry/opentelemetry-demo/pull/3171)) +* [docker] Refactor Docker Compose to use layered `-f` files with `start`, + `start-minimal`, `start-no-o11y`, and `start-minimal-no-o11y` make targets + ([#3229](https://github.com/open-telemetry/opentelemetry-demo/pull/3229)) * [kubernetes] Removed generated Kubernetes manifests in favor of docs ([#3236](https://github.com/open-telemetry/opentelemetry-demo/pull/3236)) * [cart] Swap the deprecated `OpenFeature.Contrib.Providers.Flagd` package diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index c5508ec5da..b769863676 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -134,7 +134,7 @@ docker logs - Restart containers if needed: ```sh -docker-compose restart +make restart service= ``` ### Review the Documentation diff --git a/Makefile b/Makefile index 285eb3156c..0709316b8f 100644 --- a/Makefile +++ b/Makefile @@ -14,6 +14,28 @@ ADDLICENSE = $(TOOLS_DIR)/$(ADDLICENSE_BINARY) DOCKER_COMPOSE_CMD ?= docker compose DOCKER_COMPOSE_ENV=--env-file .env --env-file .env.override +DOCKER_COMPOSE_BUILD_ARGS= + +# Compose file layers — combine with -f flags for the desired configuration: +# Core (minimal): compose.yaml +# Full (adds Kafka group): compose.yaml + compose.full.yaml +# With observability stack: + compose.observability.yaml +# With extras customizations: + compose.extras.yaml (always last) +DOCKER_COMPOSE_FILES_CORE=-f compose.yaml +DOCKER_COMPOSE_FILES_FULL=$(DOCKER_COMPOSE_FILES_CORE) -f compose.full.yaml +DOCKER_COMPOSE_FILES_OBSERVABILITY=-f compose.observability.yaml +DOCKER_COMPOSE_FILES_EXTRAS=-f compose.extras.yaml + +# Default: full demo + observability stack + extras stub +DOCKER_COMPOSE_FILES=$(DOCKER_COMPOSE_FILES_FULL) $(DOCKER_COMPOSE_FILES_OBSERVABILITY) $(DOCKER_COMPOSE_FILES_EXTRAS) + +# Java Workaround for macOS 15.2+ and M4 chips (see https://bugs.openjdk.org/browse/JDK-8345296) +ifeq ($(shell uname -m),arm64) + ifeq ($(shell uname -s),Darwin) + DOCKER_COMPOSE_ENV+= --env-file .env.arm64 + DOCKER_COMPOSE_BUILD_ARGS+= --build-arg=_JAVA_OPTIONS=-XX:UseSVE=0 + endif +endif # see https://github.com/open-telemetry/build-tools/releases for semconvgen updates # Keep links in semantic_conventions/README.md and .vscode/settings.json in sync! @@ -62,6 +84,7 @@ checklicense: $(ADDLICENSE) -ignore node_modules/** \ -ignore .expo/** \ -ignore Pods/** \ + -ignore **/extras/** \ -ignore **/vendor/** \ -ignore **/.venv/** \ -ignore **/dist/** \ @@ -79,6 +102,7 @@ addlicense: $(ADDLICENSE) -ignore node_modules/** \ -ignore .expo/** \ -ignore Pods/** \ + -ignore **/extras/** \ -ignore **/vendor/** \ -ignore **/.venv/** \ -ignore **/dist/** \ @@ -111,11 +135,11 @@ install-tools: $(MISSPELL) $(ADDLICENSE) .PHONY: build build: - $(DOCKER_COMPOSE_CMD) $(DOCKER_COMPOSE_ENV) build + $(DOCKER_COMPOSE_CMD) $(DOCKER_COMPOSE_ENV) $(DOCKER_COMPOSE_FILES) build $(DOCKER_COMPOSE_BUILD_ARGS) .PHONY: build-and-push build-and-push: - $(DOCKER_COMPOSE_CMD) $(DOCKER_COMPOSE_ENV) build --push + $(DOCKER_COMPOSE_CMD) $(DOCKER_COMPOSE_ENV) $(DOCKER_COMPOSE_FILES) build $(DOCKER_COMPOSE_BUILD_ARGS) --push # Create multiplatform builder for buildx .PHONY: create-multiplatform-builder @@ -153,12 +177,12 @@ clean-images: .PHONY: run-tests run-tests: - $(DOCKER_COMPOSE_CMD) $(DOCKER_COMPOSE_ENV) -f docker-compose-tests.yml run frontendTests - $(DOCKER_COMPOSE_CMD) $(DOCKER_COMPOSE_ENV) -f docker-compose-tests.yml run traceBasedTests + $(DOCKER_COMPOSE_CMD) $(DOCKER_COMPOSE_ENV) $(DOCKER_COMPOSE_FILES) -f docker-compose-tests.yml run frontendTests + $(DOCKER_COMPOSE_CMD) $(DOCKER_COMPOSE_ENV) $(DOCKER_COMPOSE_FILES) -f docker-compose-tests.yml run traceBasedTests .PHONY: run-tracetesting run-tracetesting: - $(DOCKER_COMPOSE_CMD) $(DOCKER_COMPOSE_ENV) -f docker-compose-tests.yml run traceBasedTests ${SERVICES_TO_TEST} + $(DOCKER_COMPOSE_CMD) $(DOCKER_COMPOSE_ENV) $(DOCKER_COMPOSE_FILES) -f docker-compose-tests.yml run traceBasedTests ${SERVICES_TO_TEST} .PHONY: generate-protobuf generate-protobuf: @@ -186,7 +210,7 @@ check-clean-work-tree: .PHONY: start start: - $(DOCKER_COMPOSE_CMD) $(DOCKER_COMPOSE_ENV) up --force-recreate --remove-orphans --detach + $(DOCKER_COMPOSE_CMD) $(DOCKER_COMPOSE_ENV) $(DOCKER_COMPOSE_FILES) up --force-recreate --remove-orphans --detach @echo "" @echo "OpenTelemetry Demo is running." @echo "Go to http://localhost:8080 for the demo UI." @@ -198,19 +222,37 @@ start: .PHONY: start-minimal start-minimal: - $(DOCKER_COMPOSE_CMD) $(DOCKER_COMPOSE_ENV) -f docker-compose.minimal.yml up --force-recreate --remove-orphans --detach + $(DOCKER_COMPOSE_CMD) $(DOCKER_COMPOSE_ENV) $(DOCKER_COMPOSE_FILES_CORE) $(DOCKER_COMPOSE_FILES_OBSERVABILITY) $(DOCKER_COMPOSE_FILES_EXTRAS) up --force-recreate --remove-orphans --detach @echo "" @echo "OpenTelemetry Demo in minimal mode is running." @echo "Go to http://localhost:8080 for the demo UI." @echo "Go to http://localhost:8080/jaeger/ui for the Jaeger UI." @echo "Go to http://localhost:8080/grafana/ for the Grafana UI." @echo "Go to http://localhost:8080/loadgen/ for the Load Generator UI." - @echo "Go to https://opentelemetry.io/docs/demo/feature-flags/ to learn how to change feature flags." + @echo "Go to http://localhost:8080/feature/ to change feature flags." + +.PHONY: start-no-o11y +start-no-o11y: + $(DOCKER_COMPOSE_CMD) $(DOCKER_COMPOSE_ENV) $(DOCKER_COMPOSE_FILES_FULL) $(DOCKER_COMPOSE_FILES_EXTRAS) up --force-recreate --remove-orphans --detach + @echo "" + @echo "OpenTelemetry Demo is running (no observability stack)." + @echo "Go to http://localhost:8080 for the demo UI." + @echo "Go to http://localhost:8080/loadgen/ for the Load Generator UI." + @echo "Go to http://localhost:8080/feature/ to change feature flags." + +.PHONY: start-minimal-no-o11y +start-minimal-no-o11y: + $(DOCKER_COMPOSE_CMD) $(DOCKER_COMPOSE_ENV) $(DOCKER_COMPOSE_FILES_CORE) $(DOCKER_COMPOSE_FILES_EXTRAS) up --force-recreate --remove-orphans --detach + @echo "" + @echo "OpenTelemetry Demo in minimal mode is running (no observability stack)." + @echo "Go to http://localhost:8080 for the demo UI." + @echo "Go to http://localhost:8080/loadgen/ for the Load Generator UI." + @echo "Go to http://localhost:8080/feature/ to change feature flags." .PHONY: stop stop: - $(DOCKER_COMPOSE_CMD) $(DOCKER_COMPOSE_ENV) down --remove-orphans --volumes - $(DOCKER_COMPOSE_CMD) $(DOCKER_COMPOSE_ENV) -f docker-compose-tests.yml down --remove-orphans --volumes + $(DOCKER_COMPOSE_CMD) $(DOCKER_COMPOSE_ENV) $(DOCKER_COMPOSE_FILES) down --remove-orphans --volumes + $(DOCKER_COMPOSE_CMD) $(DOCKER_COMPOSE_ENV) $(DOCKER_COMPOSE_FILES) -f docker-compose-tests.yml down --remove-orphans --volumes @echo "" @echo "OpenTelemetry Demo is stopped." @@ -224,10 +266,10 @@ ifdef SERVICE endif ifdef service - $(DOCKER_COMPOSE_CMD) $(DOCKER_COMPOSE_ENV) stop $(service) - $(DOCKER_COMPOSE_CMD) $(DOCKER_COMPOSE_ENV) rm --force $(service) - $(DOCKER_COMPOSE_CMD) $(DOCKER_COMPOSE_ENV) create $(service) - $(DOCKER_COMPOSE_CMD) $(DOCKER_COMPOSE_ENV) start $(service) + $(DOCKER_COMPOSE_CMD) $(DOCKER_COMPOSE_ENV) $(DOCKER_COMPOSE_FILES) stop $(service) + $(DOCKER_COMPOSE_CMD) $(DOCKER_COMPOSE_ENV) $(DOCKER_COMPOSE_FILES) rm --force $(service) + $(DOCKER_COMPOSE_CMD) $(DOCKER_COMPOSE_ENV) $(DOCKER_COMPOSE_FILES) create $(service) + $(DOCKER_COMPOSE_CMD) $(DOCKER_COMPOSE_ENV) $(DOCKER_COMPOSE_FILES) start $(service) else @echo "Please provide a service name using `service=[service name]` or `SERVICE=[service name]`" endif @@ -242,11 +284,11 @@ ifdef SERVICE endif ifdef service - $(DOCKER_COMPOSE_CMD) $(DOCKER_COMPOSE_ENV) build $(service) - $(DOCKER_COMPOSE_CMD) $(DOCKER_COMPOSE_ENV) stop $(service) - $(DOCKER_COMPOSE_CMD) $(DOCKER_COMPOSE_ENV) rm --force $(service) - $(DOCKER_COMPOSE_CMD) $(DOCKER_COMPOSE_ENV) create $(service) - $(DOCKER_COMPOSE_CMD) $(DOCKER_COMPOSE_ENV) start $(service) + $(DOCKER_COMPOSE_CMD) $(DOCKER_COMPOSE_ENV) $(DOCKER_COMPOSE_FILES) build $(DOCKER_COMPOSE_BUILD_ARGS) $(service) + $(DOCKER_COMPOSE_CMD) $(DOCKER_COMPOSE_ENV) $(DOCKER_COMPOSE_FILES) stop $(service) + $(DOCKER_COMPOSE_CMD) $(DOCKER_COMPOSE_ENV) $(DOCKER_COMPOSE_FILES) rm --force $(service) + $(DOCKER_COMPOSE_CMD) $(DOCKER_COMPOSE_ENV) $(DOCKER_COMPOSE_FILES) create $(service) + $(DOCKER_COMPOSE_CMD) $(DOCKER_COMPOSE_ENV) $(DOCKER_COMPOSE_FILES) start $(service) else @echo "Please provide a service name using `service=[service name]` or `SERVICE=[service name]`" endif diff --git a/compose.extras.yaml b/compose.extras.yaml new file mode 100644 index 0000000000..06e72acde4 --- /dev/null +++ b/compose.extras.yaml @@ -0,0 +1,26 @@ +# Copyright The OpenTelemetry Authors +# SPDX-License-Identifier: Apache-2.0 + +# Extras layer: override this file in your fork to add or modify services +# for your own observability backend or additional components. +# +# This file is intentionally empty. It is always loaded last, giving forks +# a stable, well-defined seam to customize without touching upstream files. +# +# Usage: +# docker compose -f compose.yaml -f compose.full.yaml -f compose.observability.yaml -f compose.extras.yaml up +# +# Example: add a service and patch the otel-collector to export to it: +# +# services: +# my-backend: +# image: myvendor/backend:latest +# ports: +# - "4317" +# +# otel-collector: +# depends_on: +# my-backend: +# condition: service_started +# +# Then add your exporter config to src/otel-collector/otelcol-config-extras.yml. diff --git a/compose.full.yaml b/compose.full.yaml new file mode 100644 index 0000000000..fc72e1ad06 --- /dev/null +++ b/compose.full.yaml @@ -0,0 +1,145 @@ +# Copyright The OpenTelemetry Authors +# SPDX-License-Identifier: Apache-2.0 + +# Full demo layer: adds Kafka-dependent services (accounting, fraud-detection) +# and patches core services that need Kafka awareness. +# +# Usage: +# docker compose -f compose.yaml -f compose.full.yaml up + +services: + # Accounting service (Kafka consumer) + accounting: + image: ${IMAGE_NAME}:${DEMO_VERSION}-accounting + container_name: accounting + build: + context: ./ + dockerfile: ${ACCOUNTING_DOCKERFILE} + cache_from: + - ${IMAGE_NAME}:${IMAGE_VERSION}-accounting + deploy: + resources: + limits: + memory: 160M + restart: unless-stopped + environment: + - KAFKA_ADDR + - OTEL_EXPORTER_OTLP_ENDPOINT=http://${OTEL_COLLECTOR_HOST}:${OTEL_COLLECTOR_PORT_HTTP} + - OTEL_EXPORTER_OTLP_METRICS_TEMPORALITY_PREFERENCE + - OTEL_RESOURCE_ATTRIBUTES=${OTEL_RESOURCE_ATTRIBUTES},service.criticality=low + - OTEL_SERVICE_NAME=accounting + - DB_CONNECTION_STRING=Host=${POSTGRES_HOST};Username=astronomy_user;Password=${POSTGRES_ASTRONOMY_PASSWORD};Database=astronomy_db + - OTEL_DOTNET_AUTO_TRACES_ENTITYFRAMEWORKCORE_INSTRUMENTATION_ENABLED=false + depends_on: + otel-collector: + condition: service_started + kafka: + condition: service_healthy + logging: + driver: "json-file" + options: + max-size: "5m" + max-file: "2" + + # Fraud Detection service (Kafka consumer) + fraud-detection: + image: ${IMAGE_NAME}:${DEMO_VERSION}-fraud-detection + container_name: fraud-detection + build: + context: ./ + dockerfile: ${FRAUD_DOCKERFILE} + cache_from: + - ${IMAGE_NAME}:${IMAGE_VERSION}-fraud-detection + args: + OTEL_JAVA_AGENT_VERSION: ${OTEL_JAVA_AGENT_VERSION} + deploy: + resources: + limits: + memory: 300M + restart: unless-stopped + environment: + - FLAGD_HOST + - FLAGD_PORT + - KAFKA_ADDR + - OTEL_EXPORTER_OTLP_ENDPOINT=http://${OTEL_COLLECTOR_HOST}:${OTEL_COLLECTOR_PORT_HTTP} + - OTEL_EXPORTER_OTLP_METRICS_TEMPORALITY_PREFERENCE + - OTEL_INSTRUMENTATION_KAFKA_EXPERIMENTAL_SPAN_ATTRIBUTES=true + - OTEL_INSTRUMENTATION_MESSAGING_EXPERIMENTAL_RECEIVE_TELEMETRY_ENABLED=true + - OTEL_RESOURCE_ATTRIBUTES=${OTEL_RESOURCE_ATTRIBUTES},service.criticality=low + - OTEL_SERVICE_NAME=fraud-detection + # Workaround on OSX for https://bugs.openjdk.org/browse/JDK-8345296 + - _JAVA_OPTIONS + depends_on: + otel-collector: + condition: service_started + kafka: + condition: service_healthy + logging: + driver: "json-file" + options: + max-size: "5m" + max-file: "2" + + # Kafka used by Checkout, Accounting, and Fraud Detection services + kafka: + image: ${IMAGE_NAME}:${DEMO_VERSION}-kafka + container_name: kafka + build: + context: ./ + dockerfile: ${KAFKA_DOCKERFILE} + cache_from: + - ${IMAGE_NAME}:${IMAGE_VERSION}-kafka + args: + OTEL_JAVA_AGENT_VERSION: ${OTEL_JAVA_AGENT_VERSION} + deploy: + resources: + limits: + memory: 620M + restart: unless-stopped + environment: + - KAFKA_ADVERTISED_LISTENERS=PLAINTEXT://${KAFKA_HOST}:9092 + - KAFKA_LISTENERS=PLAINTEXT://${KAFKA_HOST}:9092,CONTROLLER://${KAFKA_HOST}:9093 + - KAFKA_CONTROLLER_QUORUM_VOTERS=1@${KAFKA_HOST}:9093 + - OTEL_EXPORTER_OTLP_ENDPOINT=http://${OTEL_COLLECTOR_HOST}:${OTEL_COLLECTOR_PORT_HTTP} + - OTEL_EXPORTER_OTLP_METRICS_TEMPORALITY_PREFERENCE + - OTEL_RESOURCE_ATTRIBUTES=${OTEL_RESOURCE_ATTRIBUTES},service.criticality=low + - OTEL_SERVICE_NAME=kafka + - KAFKA_HEAP_OPTS=-Xmx400m -Xms400m + # Workaround on OSX for https://bugs.openjdk.org/browse/JDK-8345296 + - _JAVA_OPTIONS + healthcheck: + test: nc -z kafka 9092 + start_period: 10s + interval: 5s + timeout: 10s + retries: 10 + logging: + driver: "json-file" + options: + max-size: "5m" + max-file: "2" + + # Patch checkout to add Kafka dependency in full mode + checkout: + environment: + - KAFKA_ADDR + depends_on: + kafka: + condition: service_healthy + + # Patch otel-collector to add Kafka and Postgres metrics receivers + otel-collector: + command: + - "--config=/etc/otelcol-config.yml" + - "--config=/etc/otelcol-config-full.yml" + - "--config=/etc/otelcol-config-extras.yml" + volumes: + - ${OTEL_COLLECTOR_CONFIG}:/etc/otelcol-config.yml + - ${OTEL_COLLECTOR_CONFIG_FULL}:/etc/otelcol-config-full.yml + - ${OTEL_COLLECTOR_CONFIG_EXTRAS}:/etc/otelcol-config-extras.yml + environment: + - KAFKA_ADDR + - POSTGRES_HOST + - POSTGRES_PORT + - POSTGRES_PASSWORD + - POSTGRES_MONITORING_PASSWORD diff --git a/compose.observability.yaml b/compose.observability.yaml new file mode 100644 index 0000000000..3da151b210 --- /dev/null +++ b/compose.observability.yaml @@ -0,0 +1,171 @@ +# Copyright The OpenTelemetry Authors +# SPDX-License-Identifier: Apache-2.0 + +# Observability stack layer: adds Jaeger, Prometheus, OpenSearch, and Grafana, +# and patches the otel-collector to export to them. +# +# Usage: +# docker compose -f compose.yaml -f compose.observability.yaml up +# docker compose -f compose.yaml -f compose.full.yaml -f compose.observability.yaml up + +services: + # Jaeger + jaeger: + image: ${JAEGERTRACING_IMAGE} + container_name: jaeger + command: + - "--config=file:/etc/jaeger/config.yml" + deploy: + resources: + limits: + memory: 1200M + restart: unless-stopped + ports: + - "${JAEGER_UI_PORT}" + - "${JAEGER_GRPC_PORT}" + environment: + - JAEGER_HOST + - JAEGER_GRPC_PORT + - PROMETHEUS_ADDR + - OTEL_COLLECTOR_HOST + - OTEL_COLLECTOR_PORT_HTTP + - MEMORY_MAX_TRACES=25000 + volumes: + - ./src/jaeger/config.yml:/etc/jaeger/config.yml + logging: + driver: "json-file" + options: + max-size: "5m" + max-file: "2" + + # Grafana + grafana: + image: ${GRAFANA_IMAGE} + container_name: grafana + deploy: + resources: + limits: + memory: 175M + restart: unless-stopped + environment: + - "GF_INSTALL_PLUGINS=grafana-opensearch-datasource" + volumes: + - ./src/grafana/grafana.ini:/etc/grafana/grafana.ini + - ./src/grafana/provisioning/:/etc/grafana/provisioning/ + ports: + - "${GRAFANA_PORT}" + logging: + driver: "json-file" + options: + max-size: "5m" + max-file: "2" + + # Prometheus + prometheus: + image: ${PROMETHEUS_IMAGE} + container_name: prometheus + command: + - --web.console.templates=/etc/prometheus/consoles + - --web.console.libraries=/etc/prometheus/console_libraries + - --storage.tsdb.retention.time=7d + - --config.file=/etc/prometheus/prometheus-config.yaml + - --storage.tsdb.path=/prometheus + - --web.enable-lifecycle + - --web.route-prefix=/ + - --web.enable-otlp-receiver + - --enable-feature=exemplar-storage + volumes: + - ./src/prometheus/prometheus-config.yaml:/etc/prometheus/prometheus-config.yaml + deploy: + resources: + limits: + memory: 200M + restart: unless-stopped + ports: + - "${PROMETHEUS_PORT}:${PROMETHEUS_PORT}" + logging: + driver: "json-file" + options: + max-size: "5m" + max-file: "2" + + # OpenSearch + opensearch: + container_name: opensearch + build: + context: ./ + dockerfile: ${OPENSEARCH_DOCKERFILE} + args: + - OPENSEARCH_IMAGE + cache_from: + - ${IMAGE_NAME}:${IMAGE_VERSION}-opensearch + deploy: + resources: + limits: + memory: 1G + restart: unless-stopped + environment: + - cluster.name=demo-cluster + - node.name=demo-node + - bootstrap.memory_lock=true + - discovery.type=single-node + - OPENSEARCH_JAVA_OPTS=-Xms400m -Xmx400m + - DISABLE_INSTALL_DEMO_CONFIG=true + - DISABLE_SECURITY_PLUGIN=true + # Workaround on OSX for https://bugs.openjdk.org/browse/JDK-8345296 + - _JAVA_OPTIONS + ulimits: + memlock: + soft: -1 + hard: -1 + nofile: + soft: 65536 + hard: 65536 + ports: + - "9200" + healthcheck: + test: curl -s http://localhost:9200/_cluster/health | grep -E '"status":"(green|yellow)"' + start_period: 10s + interval: 5s + timeout: 10s + retries: 10 + logging: + driver: "json-file" + options: + max-size: "5m" + max-file: "2" + + # Patch frontend-proxy to wait for observability UIs + frontend-proxy: + depends_on: + jaeger: + condition: service_started + grafana: + condition: service_started + + # Patch otel-collector to add observability exporters and wait for backends. + # Note: otelcol-config-full.yml is included here to maintain correct config ordering + # (core → full → observability → extras). When running minimal+observability without + # compose.full.yaml, the Kafka/PostgreSQL receivers in the full config are harmless — + # they will simply fail to scrape since those services are not running. + otel-collector: + command: + - "--config=/etc/otelcol-config.yml" + - "--config=/etc/otelcol-config-full.yml" + - "--config=/etc/otelcol-config-observability.yml" + - "--config=/etc/otelcol-config-extras.yml" + volumes: + - ${OTEL_COLLECTOR_CONFIG}:/etc/otelcol-config.yml + - ${OTEL_COLLECTOR_CONFIG_FULL}:/etc/otelcol-config-full.yml + - ${OTEL_COLLECTOR_CONFIG_OBSERVABILITY}:/etc/otelcol-config-observability.yml + - ${OTEL_COLLECTOR_CONFIG_EXTRAS}:/etc/otelcol-config-extras.yml + environment: + - KAFKA_ADDR + - POSTGRES_HOST + - POSTGRES_PORT + - POSTGRES_MONITORING_PASSWORD + depends_on: + jaeger: + condition: service_started + opensearch: + condition: service_healthy diff --git a/compose.yaml b/compose.yaml new file mode 100644 index 0000000000..d66bb43945 --- /dev/null +++ b/compose.yaml @@ -0,0 +1,758 @@ +# Copyright The OpenTelemetry Authors +# SPDX-License-Identifier: Apache-2.0 + +# Core/minimal demo services. Run alone for the smallest footprint. +# Layer additional files on top with -f: +# Full demo (adds Kafka, accounting, fraud-detection): +# docker compose -f compose.yaml -f compose.full.yaml up +# With observability stack (Jaeger, Prometheus, OpenSearch, Grafana): +# docker compose -f compose.yaml -f compose.observability.yaml up +# Default (full + observability): +# docker compose -f compose.yaml -f compose.full.yaml -f compose.observability.yaml up +# With extras (vendor customizations, additional components): +# docker compose -f compose.yaml [-f compose.full.yaml] -f compose.observability.yaml -f compose.extras.yaml up + +x-default-logging: &logging + driver: "json-file" + options: + max-size: "5m" + max-file: "2" + +networks: + default: + name: opentelemetry-demo + driver: bridge + +services: + # ****************** + # Core Demo Services + # ****************** + + # AdService + ad: + image: ${IMAGE_NAME}:${DEMO_VERSION}-ad + container_name: ad + build: + context: ./ + dockerfile: ${AD_DOCKERFILE} + cache_from: + - ${IMAGE_NAME}:${IMAGE_VERSION}-ad + args: + OTEL_JAVA_AGENT_VERSION: ${OTEL_JAVA_AGENT_VERSION} + deploy: + resources: + limits: + memory: 300M + restart: unless-stopped + ports: + - "${AD_PORT}" + environment: + - AD_PORT + - FLAGD_HOST + - FLAGD_PORT + - OTEL_EXPORTER_OTLP_ENDPOINT=http://${OTEL_COLLECTOR_HOST}:${OTEL_COLLECTOR_PORT_HTTP} + - OTEL_EXPORTER_OTLP_METRICS_TEMPORALITY_PREFERENCE + - OTEL_RESOURCE_ATTRIBUTES=${OTEL_RESOURCE_ATTRIBUTES},service.criticality=medium + - OTEL_LOGS_EXPORTER=otlp + - OTEL_SERVICE_NAME=ad + # Workaround on OSX for https://bugs.openjdk.org/browse/JDK-8345296 + - _JAVA_OPTIONS + depends_on: + otel-collector: + condition: service_started + flagd: + condition: service_started + logging: *logging + + # Cart service + cart: + image: ${IMAGE_NAME}:${DEMO_VERSION}-cart + container_name: cart + build: + context: ./ + dockerfile: ${CART_DOCKERFILE} + cache_from: + - ${IMAGE_NAME}:${IMAGE_VERSION}-cart + deploy: + resources: + limits: + memory: 160M + restart: unless-stopped + ports: + - "${CART_PORT}" + environment: + - CART_PORT + - FLAGD_HOST + - FLAGD_PORT + - VALKEY_ADDR + - OTEL_EXPORTER_OTLP_ENDPOINT + - OTEL_EXPORTER_OTLP_METRICS_TEMPORALITY_PREFERENCE + - OTEL_RESOURCE_ATTRIBUTES=${OTEL_RESOURCE_ATTRIBUTES},service.criticality=high + - OTEL_SERVICE_NAME=cart + - ASPNETCORE_URLS=http://*:${CART_PORT} + depends_on: + valkey-cart: + condition: service_started + otel-collector: + condition: service_started + flagd: + condition: service_started + logging: *logging + + # Checkout service + checkout: + image: ${IMAGE_NAME}:${DEMO_VERSION}-checkout + container_name: checkout + build: + context: ./ + dockerfile: ${CHECKOUT_DOCKERFILE} + cache_from: + - ${IMAGE_NAME}:${IMAGE_VERSION}-checkout + deploy: + resources: + limits: + memory: 20M + restart: unless-stopped + ports: + - "${CHECKOUT_PORT}" + environment: + - FLAGD_HOST + - FLAGD_PORT + - CHECKOUT_PORT + - CART_ADDR + - CURRENCY_ADDR + - EMAIL_ADDR + - PAYMENT_ADDR + - PRODUCT_CATALOG_ADDR + - SHIPPING_ADDR + - GOMEMLIMIT=16MiB + - OTEL_EXPORTER_OTLP_ENDPOINT + - OTEL_EXPORTER_OTLP_METRICS_TEMPORALITY_PREFERENCE + - OTEL_RESOURCE_ATTRIBUTES=${OTEL_RESOURCE_ATTRIBUTES},service.criticality=critical + - OTEL_SERVICE_NAME=checkout + depends_on: + cart: + condition: service_started + currency: + condition: service_started + email: + condition: service_started + payment: + condition: service_started + product-catalog: + condition: service_started + shipping: + condition: service_started + otel-collector: + condition: service_started + flagd: + condition: service_started + logging: *logging + + # Currency service + currency: + image: ${IMAGE_NAME}:${DEMO_VERSION}-currency + container_name: currency + build: + context: ./ + dockerfile: ${CURRENCY_DOCKERFILE} + cache_from: + - ${IMAGE_NAME}:${IMAGE_VERSION}-currency + args: + OPENTELEMETRY_CPP_VERSION: ${OPENTELEMETRY_CPP_VERSION} + deploy: + resources: + limits: + memory: 20M + restart: unless-stopped + ports: + - "${CURRENCY_PORT}" + environment: + - CURRENCY_PORT + - IPV6_ENABLED + - VERSION=${IMAGE_VERSION} + - OTEL_EXPORTER_OTLP_ENDPOINT + - OTEL_EXPORTER_OTLP_METRICS_TEMPORALITY_PREFERENCE + - OTEL_RESOURCE_ATTRIBUTES=${OTEL_RESOURCE_ATTRIBUTES},service.criticality=high + - OTEL_SERVICE_NAME=currency + depends_on: + otel-collector: + condition: service_started + logging: *logging + + # Email service + email: + image: ${IMAGE_NAME}:${DEMO_VERSION}-email + container_name: email + build: + context: ./ + dockerfile: ${EMAIL_DOCKERFILE} + cache_from: + - ${IMAGE_NAME}:${IMAGE_VERSION}-email + deploy: + resources: + limits: + memory: 100M + restart: unless-stopped + ports: + - "${EMAIL_PORT}" + environment: + - APP_ENV=production + - EMAIL_PORT + - FLAGD_HOST + - FLAGD_PORT + - OTEL_EXPORTER_OTLP_ENDPOINT=http://${OTEL_COLLECTOR_HOST}:${OTEL_COLLECTOR_PORT_HTTP} + - OTEL_EXPORTER_OTLP_METRICS_TEMPORALITY_PREFERENCE + - OTEL_RESOURCE_ATTRIBUTES=${OTEL_RESOURCE_ATTRIBUTES},service.criticality=medium + - OTEL_SERVICE_NAME=email + depends_on: + otel-collector: + condition: service_started + logging: *logging + + # Frontend + frontend: + image: ${IMAGE_NAME}:${DEMO_VERSION}-frontend + container_name: frontend + build: + context: ./ + dockerfile: ${FRONTEND_DOCKERFILE} + cache_from: + - ${IMAGE_NAME}:${IMAGE_VERSION}-frontend + deploy: + resources: + limits: + memory: 250M + restart: unless-stopped + ports: + - "${FRONTEND_PORT}" + environment: + - PORT=${FRONTEND_PORT} + - FRONTEND_ADDR + - AD_ADDR + - CART_ADDR + - CHECKOUT_ADDR + - CURRENCY_ADDR + - PRODUCT_CATALOG_ADDR + - PRODUCT_REVIEWS_ADDR + - RECOMMENDATION_ADDR + - SHIPPING_ADDR + - OTEL_EXPORTER_OTLP_ENDPOINT + - OTEL_RESOURCE_ATTRIBUTES=${OTEL_RESOURCE_ATTRIBUTES},service.criticality=critical + - ENV_PLATFORM + - OTEL_SERVICE_NAME=frontend + - PUBLIC_OTEL_EXPORTER_OTLP_TRACES_ENDPOINT + - OTEL_EXPORTER_OTLP_METRICS_TEMPORALITY_PREFERENCE + - WEB_OTEL_SERVICE_NAME=frontend-web + - OTEL_COLLECTOR_HOST + - FLAGD_HOST + - FLAGD_PORT + depends_on: + ad: + condition: service_started + cart: + condition: service_started + checkout: + condition: service_started + currency: + condition: service_started + product-catalog: + condition: service_started + quote: + condition: service_started + recommendation: + condition: service_started + shipping: + condition: service_started + otel-collector: + condition: service_started + image-provider: + condition: service_started + flagd: + condition: service_started + logging: *logging + + # Frontend Proxy (Envoy) + frontend-proxy: + image: ${IMAGE_NAME}:${DEMO_VERSION}-frontend-proxy + container_name: frontend-proxy + build: + context: ./ + dockerfile: ${FRONTEND_PROXY_DOCKERFILE} + cache_from: + - ${IMAGE_NAME}:${IMAGE_VERSION}-frontend-proxy + deploy: + resources: + limits: + memory: 65M + restart: unless-stopped + ports: + - "${ENVOY_PORT}:${ENVOY_PORT}" + - "${ENVOY_ADMIN_PORT}:${ENVOY_ADMIN_PORT}" + environment: + - FRONTEND_PORT + - FRONTEND_HOST + - LOCUST_WEB_HOST + - LOCUST_WEB_PORT + - GRAFANA_PORT + - GRAFANA_HOST + - JAEGER_UI_PORT + - JAEGER_HOST + - OTEL_COLLECTOR_HOST + - IMAGE_PROVIDER_HOST + - IMAGE_PROVIDER_PORT + - OTEL_COLLECTOR_PORT_GRPC + - OTEL_COLLECTOR_PORT_HTTP + - OTEL_RESOURCE_ATTRIBUTES=${OTEL_RESOURCE_ATTRIBUTES},service.criticality=critical + - OTEL_SERVICE_NAME=frontend-proxy + - ENVOY_PORT + - ENVOY_ADDR + - ENVOY_ADMIN_PORT + - FLAGD_HOST + - FLAGD_PORT + - FLAGD_UI_HOST + - FLAGD_UI_PORT + - TELEMETRY_DOCS_HOST + - TELEMETRY_DOCS_PORT + depends_on: + frontend: + condition: service_started + load-generator: + condition: service_started + flagd-ui: + condition: service_started + dns_search: "" + + # image-provider + image-provider: + image: ${IMAGE_NAME}:${DEMO_VERSION}-image-provider + container_name: image-provider + build: + context: ./ + dockerfile: ${IMAGE_PROVIDER_DOCKERFILE} + cache_from: + - ${IMAGE_NAME}:${IMAGE_VERSION}-image-provider + deploy: + resources: + limits: + memory: 120M + restart: unless-stopped + ports: + - "${IMAGE_PROVIDER_PORT}" + environment: + - IMAGE_PROVIDER_PORT + - OTEL_COLLECTOR_HOST + - OTEL_COLLECTOR_PORT_GRPC + - OTEL_RESOURCE_ATTRIBUTES=${OTEL_RESOURCE_ATTRIBUTES},service.criticality=low + - OTEL_SERVICE_NAME=image-provider + depends_on: + otel-collector: + condition: service_started + logging: *logging + + # Load Generator + load-generator: + image: ${IMAGE_NAME}:${DEMO_VERSION}-load-generator + container_name: load-generator + build: + context: ./ + dockerfile: ${LOAD_GENERATOR_DOCKERFILE} + cache_from: + - ${IMAGE_NAME}:${IMAGE_VERSION}-load-generator + deploy: + resources: + limits: + memory: 1500M + restart: unless-stopped + ports: + - "${LOCUST_WEB_PORT}" + environment: + - LOCUST_WEB_PORT + - LOCUST_USERS + - LOCUST_HOST + - LOCUST_HEADLESS + - LOCUST_AUTOSTART + - LOCUST_BROWSER_TRAFFIC_ENABLED=true + - OTEL_EXPORTER_OTLP_ENDPOINT + - OTEL_EXPORTER_OTLP_METRICS_TEMPORALITY_PREFERENCE + - OTEL_RESOURCE_ATTRIBUTES=${OTEL_RESOURCE_ATTRIBUTES},service.criticality=low + - OTEL_SERVICE_NAME=load-generator + - PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python + - LOCUST_WEB_HOST=0.0.0.0 + - FLAGD_HOST + - FLAGD_PORT + - FLAGD_OFREP_PORT + depends_on: + frontend: + condition: service_started + flagd: + condition: service_started + logging: *logging + + # Payment service + payment: + image: ${IMAGE_NAME}:${DEMO_VERSION}-payment + container_name: payment + build: + context: ./ + dockerfile: ${PAYMENT_DOCKERFILE} + cache_from: + - ${IMAGE_NAME}:${IMAGE_VERSION}-payment + deploy: + resources: + limits: + memory: 140M + restart: unless-stopped + ports: + - "${PAYMENT_PORT}" + environment: + - IPV6_ENABLED + - PAYMENT_PORT + - FLAGD_HOST + - FLAGD_PORT + - OTEL_EXPORTER_OTLP_ENDPOINT + - OTEL_EXPORTER_OTLP_METRICS_TEMPORALITY_PREFERENCE + - OTEL_RESOURCE_ATTRIBUTES=${OTEL_RESOURCE_ATTRIBUTES},service.criticality=critical + - OTEL_SERVICE_NAME=payment + depends_on: + otel-collector: + condition: service_started + flagd: + condition: service_started + logging: *logging + + # Product Catalog service + product-catalog: + image: ${IMAGE_NAME}:${DEMO_VERSION}-product-catalog + container_name: product-catalog + build: + context: ./ + dockerfile: ${PRODUCT_CATALOG_DOCKERFILE} + cache_from: + - ${IMAGE_NAME}:${IMAGE_VERSION}-product-catalog + deploy: + resources: + limits: + memory: 20M + restart: unless-stopped + ports: + - "${PRODUCT_CATALOG_PORT}" + environment: + - PRODUCT_CATALOG_PORT + - FLAGD_HOST + - FLAGD_PORT + - GOMEMLIMIT=16MiB + - OTEL_EXPORTER_OTLP_ENDPOINT + - OTEL_EXPORTER_OTLP_METRICS_TEMPORALITY_PREFERENCE + - OTEL_RESOURCE_ATTRIBUTES=${OTEL_RESOURCE_ATTRIBUTES},service.criticality=high + - OTEL_SERVICE_NAME=product-catalog + - OTEL_CONFIG_FILE=/otel-config.yml + - OTEL_SEMCONV_STABILITY_OPT_IN=database + - DB_CONNECTION_STRING=postgres://astronomy_user:${POSTGRES_ASTRONOMY_PASSWORD}@${POSTGRES_HOST}/astronomy_db?sslmode=disable + depends_on: + otel-collector: + condition: service_started + flagd: + condition: service_started + astronomy-db: + condition: service_started + volumes: + - ./otel-config.yml:/otel-config.yml + logging: *logging + + # Product reviews service + product-reviews: + image: ${IMAGE_NAME}:${DEMO_VERSION}-product-reviews + container_name: product-reviews + build: + context: ./ + dockerfile: ${PRODUCT_REVIEWS_DOCKERFILE} + cache_from: + - ${IMAGE_NAME}:${IMAGE_VERSION}-product-reviews + deploy: + resources: + limits: + memory: 100M + restart: unless-stopped + ports: + - "${PRODUCT_REVIEWS_PORT}" + environment: + - PRODUCT_REVIEWS_PORT + - OTEL_PYTHON_LOG_CORRELATION=true + - OTEL_EXPORTER_OTLP_ENDPOINT + - OTEL_EXPORTER_OTLP_METRICS_TEMPORALITY_PREFERENCE + - OTEL_RESOURCE_ATTRIBUTES=${OTEL_RESOURCE_ATTRIBUTES},service.criticality=medium + - OTEL_SERVICE_NAME=product-reviews + - OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT=true + - PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python + - DB_CONNECTION_STRING=host=${POSTGRES_HOST} user=astronomy_user password=${POSTGRES_ASTRONOMY_PASSWORD} dbname=astronomy_db + - LLM_BASE_URL + - OPENAI_API_KEY + - LLM_MODEL + - PRODUCT_CATALOG_ADDR + - FLAGD_HOST + - FLAGD_PORT + - LLM_HOST + - LLM_PORT + depends_on: + product-catalog: + condition: service_started + llm: + condition: service_started + astronomy-db: + condition: service_started + otel-collector: + condition: service_started + logging: *logging + + # Quote service + quote: + image: ${IMAGE_NAME}:${DEMO_VERSION}-quote + container_name: quote + build: + context: ./ + dockerfile: ${QUOTE_DOCKERFILE} + cache_from: + - ${IMAGE_NAME}:${IMAGE_VERSION}-quote + deploy: + resources: + limits: + memory: 40M + restart: unless-stopped + ports: + - "${QUOTE_PORT}" + environment: + - IPV6_ENABLED + - OTEL_EXPORTER_OTLP_ENDPOINT=http://${OTEL_COLLECTOR_HOST}:${OTEL_COLLECTOR_PORT_HTTP} + - OTEL_EXPORTER_OTLP_METRICS_TEMPORALITY_PREFERENCE + - OTEL_PHP_AUTOLOAD_ENABLED=true + - QUOTE_PORT + - OTEL_PHP_INTERNAL_METRICS_ENABLED=true + - OTEL_RESOURCE_ATTRIBUTES=${OTEL_RESOURCE_ATTRIBUTES},service.criticality=low + - OTEL_SERVICE_NAME=quote + depends_on: + otel-collector: + condition: service_started + logging: *logging + + # Recommendation service + recommendation: + image: ${IMAGE_NAME}:${DEMO_VERSION}-recommendation + container_name: recommendation + build: + context: ./ + dockerfile: ${RECOMMENDATION_DOCKERFILE} + cache_from: + - ${IMAGE_NAME}:${IMAGE_VERSION}-recommendation + deploy: + resources: + limits: + memory: 500M # This is high to enable supporting the recommendationCache feature flag use case + restart: unless-stopped + ports: + - "${RECOMMENDATION_PORT}" + environment: + - RECOMMENDATION_PORT + - PRODUCT_CATALOG_ADDR + - FLAGD_HOST + - FLAGD_PORT + - OTEL_PYTHON_LOG_CORRELATION=true + - OTEL_EXPORTER_OTLP_ENDPOINT + - OTEL_EXPORTER_OTLP_METRICS_TEMPORALITY_PREFERENCE + - OTEL_RESOURCE_ATTRIBUTES=${OTEL_RESOURCE_ATTRIBUTES},service.criticality=medium + - OTEL_SERVICE_NAME=recommendation + - PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python + depends_on: + product-catalog: + condition: service_started + otel-collector: + condition: service_started + flagd: + condition: service_started + logging: *logging + + # Shipping service + shipping: + image: ${IMAGE_NAME}:${DEMO_VERSION}-shipping + container_name: shipping + build: + context: ./ + dockerfile: ${SHIPPING_DOCKERFILE} + cache_from: + - ${IMAGE_NAME}:${IMAGE_VERSION}-shipping + deploy: + resources: + limits: + memory: 20M + restart: unless-stopped + ports: + - "${SHIPPING_PORT}" + environment: + - IPV6_ENABLED + - SHIPPING_PORT + - QUOTE_ADDR + - OTEL_EXPORTER_OTLP_ENDPOINT + - OTEL_RESOURCE_ATTRIBUTES=${OTEL_RESOURCE_ATTRIBUTES},service.criticality=high + - OTEL_SERVICE_NAME=shipping + - OTEL_EXPORTER_OTLP_METRICS_TEMPORALITY_PREFERENCE + depends_on: + otel-collector: + condition: service_started + logging: *logging + + # ****************** + # Dependent Services + # ****************** + + # Flagd, feature flagging service + flagd: + image: ${FLAGD_IMAGE} + container_name: flagd + deploy: + resources: + limits: + memory: 75M + restart: unless-stopped + environment: + - FLAGD_OTEL_COLLECTOR_URI=${OTEL_COLLECTOR_HOST}:${OTEL_COLLECTOR_PORT_GRPC} + - FLAGD_METRICS_EXPORTER=otel + - GOMEMLIMIT=60MiB + - OTEL_RESOURCE_ATTRIBUTES=${OTEL_RESOURCE_ATTRIBUTES},service.criticality=low + - OTEL_SERVICE_NAME=flagd + command: [ + "start", + "--uri", + "file:./etc/flagd/demo.flagd.json" + ] + ports: + - "${FLAGD_PORT}" + - "${FLAGD_OFREP_PORT}" + volumes: + - ./src/flagd:/etc/flagd + logging: *logging + + # Flagd UI for configuring the feature flag service + flagd-ui: + image: ${IMAGE_NAME}:${DEMO_VERSION}-flagd-ui + container_name: flagd-ui + build: + context: ./ + dockerfile: ${FLAGD_UI_DOCKERFILE} + cache_from: + - ${IMAGE_NAME}:${IMAGE_VERSION}-flagd-ui + deploy: + resources: + limits: + memory: 200M + restart: always + environment: + - FLAGD_UI_PORT + - OTEL_EXPORTER_OTLP_ENDPOINT=http://${OTEL_COLLECTOR_HOST}:${OTEL_COLLECTOR_PORT_HTTP} + - OTEL_EXPORTER_OTLP_METRICS_TEMPORALITY_PREFERENCE + - OTEL_RESOURCE_ATTRIBUTES=${OTEL_RESOURCE_ATTRIBUTES},service.criticality=low + - OTEL_SERVICE_NAME=flagd-ui + - SECRET_KEY_BASE=yYrECL4qbNwleYInGJYvVnSkwJuSQJ4ijPTx5tirGUXrbznFIBFVJdPl5t6O9ASw + - PHX_HOST=localhost + ports: + - "${FLAGD_UI_PORT}" + depends_on: + otel-collector: + condition: service_started + flagd: + condition: service_started + volumes: + - ./src/flagd:/app/data + + # LLM used by Product Review service + llm: + image: ${IMAGE_NAME}:${DEMO_VERSION}-llm + container_name: llm + build: + context: ./ + dockerfile: ${LLM_DOCKERFILE} + cache_from: + - ${IMAGE_NAME}:${IMAGE_VERSION}-llm + deploy: + resources: + limits: + memory: 50M + restart: unless-stopped + environment: + - FLAGD_HOST + - FLAGD_PORT + ports: + - "${LLM_PORT}" + depends_on: + flagd: + condition: service_started + logging: *logging + + # PostgreSQL database for the astronomy demo + astronomy-db: + image: ${POSTGRES_IMAGE} + container_name: astronomy-db + deploy: + resources: + limits: + memory: 80M + restart: unless-stopped + ports: + - ${POSTGRES_PORT} + command: postgres -c shared_preload_libraries=pg_stat_statements + environment: + - POSTGRES_PASSWORD + volumes: + - ./src/postgresql/init.sql:/docker-entrypoint-initdb.d/init.sql + logging: *logging + + # Valkey used by Cart service + valkey-cart: + image: ${VALKEY_IMAGE} + container_name: valkey-cart + user: valkey + deploy: + resources: + limits: + memory: 20M + restart: unless-stopped + ports: + - "${VALKEY_PORT}" + logging: *logging + + # ******************** + # Telemetry Components + # ******************** + + # OpenTelemetry Collector + # Base configuration: receives and processes telemetry, exports to debug only. + # Add compose.observability.yaml to enable Jaeger, Prometheus, OpenSearch, and Grafana. + otel-collector: + image: ${COLLECTOR_CONTRIB_IMAGE} + container_name: otel-collector + deploy: + resources: + limits: + memory: 200M + restart: unless-stopped + command: + - "--config=/etc/otelcol-config.yml" + - "--config=/etc/otelcol-config-extras.yml" + user: 0:0 + volumes: + - ${HOST_FILESYSTEM}:/hostfs:ro + - ${DOCKER_SOCK}:/var/run/docker.sock:ro + - ${OTEL_COLLECTOR_CONFIG}:/etc/otelcol-config.yml + - ${OTEL_COLLECTOR_CONFIG_EXTRAS}:/etc/otelcol-config-extras.yml + ports: + - "${OTEL_COLLECTOR_PORT_GRPC}" + - "${OTEL_COLLECTOR_PORT_HTTP}" + logging: *logging + environment: + - FRONTEND_PROXY_ADDR + - HOST_FILESYSTEM + - IMAGE_PROVIDER_HOST + - IMAGE_PROVIDER_PORT + - OTEL_COLLECTOR_HOST + - OTEL_COLLECTOR_PORT_GRPC + - OTEL_COLLECTOR_PORT_HTTP + - GOMEMLIMIT=160MiB diff --git a/src/otel-collector/otelcol-config-extras.yml b/src/otel-collector/otelcol-config-extras.yml index 5e863a4295..f241448f20 100644 --- a/src/otel-collector/otelcol-config-extras.yml +++ b/src/otel-collector/otelcol-config-extras.yml @@ -1,18 +1,33 @@ # Copyright The OpenTelemetry Authors # SPDX-License-Identifier: Apache-2.0 -# extra settings to be merged into OpenTelemetry Collector configuration -# do not delete this file - -## Example configuration for sending data to your own OTLP HTTP backend -## Note: the spanmetrics exporter must be included in the exporters array -## if overriding the traces pipeline. -## -# exporters: -# otlp_http/example: -# endpoint: +# Extras/customization layer for the OTel Collector. +# Override this file in your fork to add exporters for your own observability backend. +# +# This file is intentionally empty. It is always loaded last, giving forks +# a stable, well-defined seam to customize without touching upstream files. +# +# IMPORTANT: The OTel Collector merges config files but REPLACES arrays, not appends. +# If you want to keep the default observability stack alongside your own exporters, +# you must explicitly repeat the upstream exporter names in your pipeline definitions. +# +# Upstream exporter names (repeat these to keep them active): +# traces: otlp_grpc/jaeger, debug, spanmetrics +# metrics: otlp_http/prometheus, debug +# logs: opensearch, debug +# +# Example: +# +# exporters: +# otlp/mybackend: +# endpoint: https://ingest.mybackend.com # -# service: -# pipelines: -# traces: -# exporters: [spanmetrics, otlp_http/example] +# service: +# pipelines: +# traces: +# exporters: [otlp_grpc/jaeger, debug, spanmetrics, otlp/mybackend] # repeat upstream exporters +# metrics: +# receivers: [docker_stats, httpcheck/frontend-proxy, hostmetrics, nginx, otlp, redis, spanmetrics] +# exporters: [otlp_http/prometheus, debug, otlp/mybackend] # repeat upstream exporters +# logs: +# exporters: [opensearch, debug, otlp/mybackend] # repeat upstream exporters diff --git a/src/otel-collector/otelcol-config-full.yml b/src/otel-collector/otelcol-config-full.yml new file mode 100644 index 0000000000..1196bcfd70 --- /dev/null +++ b/src/otel-collector/otelcol-config-full.yml @@ -0,0 +1,46 @@ +# Copyright The OpenTelemetry Authors +# SPDX-License-Identifier: Apache-2.0 + +# Full demo collector additions: Kafka and PostgreSQL metric receivers. +# Loaded when running with compose.full.yaml. +# +# IMPORTANT: The OTel Collector merges config files but REPLACES arrays, not appends. +# The metrics pipeline below must repeat all receivers from otelcol-config.yml. + +receivers: + kafkametrics: + scrapers: + - brokers + - topics + - consumers + brokers: + - ${KAFKA_ADDR} + collection_interval: 10s + postgresql: + endpoint: ${POSTGRES_HOST}:${POSTGRES_PORT} + username: monitoring_user + password: ${env:POSTGRES_MONITORING_PASSWORD} + metrics: + postgresql.blks_hit: + enabled: true + postgresql.blks_read: + enabled: true + postgresql.tup_fetched: + enabled: true + postgresql.tup_returned: + enabled: true + postgresql.tup_inserted: + enabled: true + postgresql.tup_updated: + enabled: true + postgresql.tup_deleted: + enabled: true + postgresql.deadlocks: + enabled: true + tls: + insecure: true + +service: + pipelines: + metrics: + receivers: [docker_stats, httpcheck/frontend-proxy, hostmetrics, nginx, otlp, redis, spanmetrics, kafkametrics, postgresql] diff --git a/src/otel-collector/otelcol-config-observability.yml b/src/otel-collector/otelcol-config-observability.yml new file mode 100644 index 0000000000..dfcab2cd1c --- /dev/null +++ b/src/otel-collector/otelcol-config-observability.yml @@ -0,0 +1,51 @@ +# Copyright The OpenTelemetry Authors +# SPDX-License-Identifier: Apache-2.0 + +# Observability stack collector additions: Jaeger, Prometheus, and OpenSearch exporters. +# Loaded when running with compose.observability.yaml. +# +# IMPORTANT: The OTel Collector merges config files but REPLACES arrays, not appends. +# The pipeline exporters below must repeat all exporters from earlier config layers +# (e.g. debug, spanmetrics) to keep them active alongside the observability exporters. +# +# Exporter name reference (these are the stable names forks should reference): +# traces: otlp_grpc/jaeger +# metrics: otlp_http/prometheus +# logs: opensearch +# +# If you are running with compose.full.yaml, also repeat the full receivers in metrics. + +exporters: + otlp_grpc/jaeger: + endpoint: "jaeger:4317" + tls: + insecure: true + sending_queue: + batch: + otlp_http/prometheus: + endpoint: "http://prometheus:9090/api/v1/otlp" + tls: + insecure: true + sending_queue: + batch: + opensearch: + logs_index: otel-logs + logs_index_time_format: "yyyy-MM-dd" + http: + endpoint: "http://opensearch:9200" + tls: + insecure: true + sending_queue: + # Explicitly set due to bug: https://github.com/open-telemetry/opentelemetry-collector-contrib/issues/45016 + num_consumers: 10 + queue_size: 1000 + batch: + +service: + pipelines: + traces: + exporters: [debug, otlp_grpc/jaeger, spanmetrics] + metrics: + exporters: [debug, otlp_http/prometheus] + logs: + exporters: [debug, opensearch] diff --git a/src/otel-collector/otelcol-config.yml b/src/otel-collector/otelcol-config.yml index 4e11bf99a8..8780600c33 100644 --- a/src/otel-collector/otelcol-config.yml +++ b/src/otel-collector/otelcol-config.yml @@ -1,6 +1,13 @@ # Copyright The OpenTelemetry Authors # SPDX-License-Identifier: Apache-2.0 +# Base collector configuration: receivers, processors, and pipelines. +# Exports only to the debug exporter by default. +# Layer additional configs on top with --config to add exporters: +# otelcol-config-full.yml adds Kafka and PostgreSQL metric receivers +# otelcol-config-observability.yml adds Jaeger, Prometheus, OpenSearch exporters +# otelcol-config-extras.yml extras/customizations (empty stub) + receivers: otlp: protocols: @@ -22,37 +29,6 @@ receivers: endpoint: unix:///var/run/docker.sock # https://github.com/open-telemetry/opentelemetry-collector-contrib/issues/44511 api_version: "1.44" - kafkametrics: - scrapers: - - brokers - - topics - - consumers - brokers: - - ${KAFKA_ADDR} - collection_interval: 10s - postgresql: - endpoint: ${POSTGRES_HOST}:${POSTGRES_PORT} - username: monitoring_user - password: ${env:POSTGRES_MONITORING_PASSWORD} - metrics: - postgresql.blks_hit: - enabled: true - postgresql.blks_read: - enabled: true - postgresql.tup_fetched: - enabled: true - postgresql.tup_returned: - enabled: true - postgresql.tup_inserted: - enabled: true - postgresql.tup_updated: - enabled: true - postgresql.tup_deleted: - enabled: true - postgresql.deadlocks: - enabled: true - tls: - insecure: true redis: endpoint: "valkey-cart:6379" username: "valkey" @@ -117,7 +93,6 @@ receivers: metrics: system.paging.usage: enabled: true - processes: process: mute_process_exe_error: true @@ -127,32 +102,10 @@ receivers: metrics: system.uptime: enabled: true + exporters: debug: - otlp_grpc/jaeger: - endpoint: "jaeger:4317" - tls: - insecure: true - sending_queue: - batch: - otlp_http/prometheus: - endpoint: "http://prometheus:9090/api/v1/otlp" - tls: - insecure: true - sending_queue: - batch: - opensearch: - logs_index: otel-logs - logs_index_time_format: "yyyy-MM-dd" - http: - endpoint: "http://opensearch:9200" - tls: - insecure: true - sending_queue: - # Explicitly set due to bug: https://github.com/open-telemetry/opentelemetry-collector-contrib/issues/45016 - num_consumers: 10 - queue_size: 1000 - batch: + processors: memory_limiter: check_interval: 5s @@ -213,16 +166,16 @@ service: pipelines: traces: receivers: [otlp] - processors: [resourcedetection, memory_limiter, transform/sanitize_spans] - exporters: [otlp_grpc/jaeger, debug, spanmetrics] + processors: [memory_limiter, resourcedetection, transform/sanitize_spans] + exporters: [debug, spanmetrics] metrics: - receivers: [docker_stats, httpcheck/frontend-proxy, hostmetrics, nginx, otlp, postgresql, redis, spanmetrics, kafkametrics] - processors: [resourcedetection, memory_limiter] - exporters: [otlp_http/prometheus, debug] + receivers: [docker_stats, httpcheck/frontend-proxy, hostmetrics, nginx, otlp, redis, spanmetrics] + processors: [memory_limiter, resourcedetection] + exporters: [debug] logs: receivers: [otlp] - processors: [resourcedetection, memory_limiter] - exporters: [opensearch, debug] + processors: [memory_limiter, resourcedetection] + exporters: [debug] telemetry: metrics: readers: