ethui · naps62 · Feb 3, 2026 · Feb 3, 2026 · Feb 3, 2026 · Feb 3, 2026
diff --git a/compose.saas.yml b/compose.saas.yml
@@ -35,6 +35,15 @@ services:
       - 'traefik.http.routers.ethui-stacks-secure.tls.domains[0].main=stacks.ethui.dev'
       - 'traefik.http.routers.ethui-stacks-secure.tls.domains[0].sans=*.stacks.ethui.dev'
 
+      # block external access to /metrics
+      - "traefik.http.routesr.ethui-stacks-secure.middlewares=block-metrics"
+      - "traefik.http.middlewares.block-metrics.replacepathregex.regex=^/metrics.*"
+      - "traefik.http.middlewares.block-metrics.replacepathregex.replacement=/404-not-found"
+
+      # enabling scraping from alloy/prometheus
+      - "prometheus.scrape=true"
+      - "prometheus.port=4000"
+
 volumes:
   data:
 

diff --git a/server/config/runtime.exs b/server/config/runtime.exs
@@ -81,6 +81,31 @@ if config_env() == :prod do
 
   config :ethui, :jwt_secret, jwt_secret
 
+  # JSON logging configuration for production
+  config :logger, :default_handler,
+    formatter:
+      {LoggerJSON.Formatters.Basic,
+       metadata: :all, exclude_metadata: [:domain, :erl_level, :gl, :time]}
-       metadata: :all, exclude_metadata: [:domain, :erl_level, :gl, :time]}
+       metadata: [
+         :request_id,
+         :user_id,
+         :stack_slug,
+         :remote_ip,
+         :method,
+         :path,
+         :status,
+         :duration,
+         :pid,
+         :application,
+         :module,
+         :function,
+         :file,
+         :line
+       ]}
-       metadata: :all, exclude_metadata: [:domain, :erl_level, :gl, :time]}
+       metadata: [
+         :request_id,
+         :user_id,
+         :stack_slug,
+         :remote_ip,
+         :method,
+         :path,
+         :status,
+         :duration,
+         :pid,
+         :application,
+         :module,
+         :function,
+         :file,
+         :line
+       ]}
+
+  config :logger, :console,
+    format: {LoggerJSON.Formatters.Basic, :format},
+    metadata: [
+      :request_id,
+      :user_id,
+      :stack_slug,
+      :remote_ip,
+      :method,
+      :path,
+      :status,
+      :duration,
+      :pid,
+      :application,
+      :module,
+      :function,
+      :file,
+      :line
+    ]
+
   if is_saas? do
     config :ethui, Ethui.Mailer,
       adapter: Swoosh.Adapters.Mua,

diff --git a/server/lib/ethui_web/controllers/api/auth_controller.ex b/server/lib/ethui_web/controllers/api/auth_controller.ex
@@ -12,6 +12,12 @@ defmodule EthuiWeb.Api.AuthController do
   """
   def send_code(conn, %{"email" => email}) do
     with {:ok, _user} <- Accounts.send_verification_code(email) do
+      :telemetry.execute(
+        [:ethui, :auth, :code_sent],
+        %{count: 1},
+        %{email: email}
+      )
+
       render(conn, :send_code, message: "Verification code sent")
     end
   end
@@ -24,12 +30,30 @@ defmodule EthuiWeb.Api.AuthController do
   def verify_code(conn, %{"email" => email, "code" => code}) do
     case Accounts.verify_code_and_generate_token(email, code) do
       {:ok, token} ->
+        :telemetry.execute(
+          [:ethui, :auth, :code_verified],
+          %{count: 1},
+          %{status: :success, email: email}
+        )
+
         render(conn, :verify_code, token: token)
 
       {:error, :invalid_code} ->
+        :telemetry.execute(
+          [:ethui, :auth, :code_verified],
+          %{count: 1},
+          %{status: :invalid_code, email: email}
+        )
+
         {:error, "Invalid or expired verification code"}
 
       {:error, _reason} ->
+        :telemetry.execute(
+          [:ethui, :auth, :code_verified],
+          %{count: 1},
+          %{status: :error, email: email}
+        )
+
         {:error, "Verification failed"}
     end
   end

diff --git a/server/lib/ethui_web/controllers/api/stack_controller.ex b/server/lib/ethui_web/controllers/api/stack_controller.ex
@@ -32,6 +32,12 @@ defmodule EthuiWeb.Api.StackController do
 
     with {:ok, stack} <- Stacks.create_stack(user, params),
          _ <- Server.create(stack) do
+      :telemetry.execute(
+        [:ethui, :stacks, :created],
+        %{count: 1},
+        %{user_id: user && user.id, stack_slug: stack.slug}
+      )
+
       conn
       |> put_status(:created)
       |> render(:create, stack: stack)
@@ -45,6 +51,12 @@ defmodule EthuiWeb.Api.StackController do
          :ok <- authorize_user_access(user, stack),
          _ <- Server.destroy(stack),
          {:ok, _} <- Stacks.delete_stack(stack) do
+      :telemetry.execute(
+        [:ethui, :stacks, :deleted],
+        %{count: 1},
+        %{user_id: user && user.id, stack_slug: slug}
+      )
+
       send_resp(conn, :no_content, "")
     else
       nil ->

diff --git a/server/lib/ethui_web/controllers/metrics_controller.ex b/server/lib/ethui_web/controllers/metrics_controller.ex
@@ -0,0 +1,19 @@
+defmodule EthuiWeb.MetricsController do
+  use EthuiWeb, :controller
+
+  @moduledoc """
+  Exposes Prometheus metrics for scraping.
+
+  This endpoint is designed to be accessible only within the internal Docker/Dokploy network,
+  not via external Traefik routing. Prometheus should scrape metrics directly from the
+  application on port 4000.
+  """
+
+  def index(conn, _params) do
+    metrics = TelemetryMetricsPrometheus.Core.scrape(EthuiWeb.Telemetry.Prometheus)
+
+    conn
+    |> put_resp_content_type("text/plain")
+    |> send_resp(200, metrics)
+  end
+end
diff --git a/server/lib/ethui_web/endpoint.ex b/server/lib/ethui_web/endpoint.ex
@@ -34,6 +34,7 @@ defmodule EthuiWeb.Endpoint do
     cookie_key: "request_logger"
 
   plug Plug.RequestId
+  plug EthuiWeb.Plugs.LogMetadata
   plug Plug.Telemetry, event_prefix: [:phoenix, :endpoint]
 
   plug CORSPlug, origin: ["*"]

diff --git a/server/lib/ethui_web/plugs/log_metadata.ex b/server/lib/ethui_web/plugs/log_metadata.ex
@@ -0,0 +1,68 @@
+defmodule EthuiWeb.Plugs.LogMetadata do
+  @moduledoc """
+  Adds request-specific metadata to logs for structured JSON logging.
+
+  This plug enriches logs with contextual information including:
+  - Request ID, remote IP, HTTP method, and path
+  - User ID (if authenticated)
+  - Stack slug (if available from subdomain routing)
+  - Response status and duration (added before sending response)
+  """
+
+  import Plug.Conn
+  require Logger
+
+  def init(opts), do: opts
+
+  def call(conn, _opts) do
+    Logger.metadata(
+      request_id: get_request_id(conn),
+      remote_ip: format_ip(conn.remote_ip),
+      method: conn.method,
+      path: conn.request_path
+    )
+
+    # Add user_id if authenticated
+    case conn.assigns[:current_user] do
+      %{id: user_id} -> Logger.metadata(user_id: user_id)
+      _ -> :ok
+    end
+
+    # Add stack_slug if available (from subdomain routing)
+    case conn.assigns[:stack] do
+      %{slug: slug} -> Logger.metadata(stack_slug: slug)
+      _ -> :ok
+    end
+
+    register_before_send(conn, fn conn ->
+      # Add response metadata before sending
+      Logger.metadata(
+        status: conn.status,
+        duration: calculate_duration(conn)
+      )
+      conn
+    end)
+  end
+
+  defp get_request_id(conn) do
+    case get_resp_header(conn, "x-request-id") do
+      [request_id] -> request_id
+      _ -> Logger.metadata()[:request_id]
+    end
+  end
+
+  defp format_ip({a, b, c, d}), do: "#{a}.#{b}.#{c}.#{d}"
+  defp format_ip(ip), do: inspect(ip)
+
+  defp calculate_duration(conn) do
+    case conn.private[:phoenix_endpoint_start] do
+      %{system: start} ->
+        System.monotonic_time()
+        |> Kernel.-(start)
+        |> System.convert_time_unit(:native, :microsecond)
+
+      _ ->
+        nil
+    end
+  end
+end
diff --git a/server/lib/ethui_web/router.ex b/server/lib/ethui_web/router.ex
@@ -77,6 +77,11 @@ defmodule EthuiWeb.Router do
     end
   end
 
+  # Metrics endpoint - accessible only within internal Docker network
+  scope "/" do
+    get "/metrics", EthuiWeb.MetricsController, :index
+  end
+
   scope "/", EthuiWeb do
     pipe_through :proxy
 

diff --git a/server/lib/ethui_web/telemetry.ex b/server/lib/ethui_web/telemetry.ex
@@ -11,9 +11,9 @@ defmodule EthuiWeb.Telemetry do
     children = [
       # Telemetry poller will execute the given period measurements
       # every 10_000ms. Learn more here: https://hexdocs.pm/telemetry_metrics
-      {:telemetry_poller, measurements: periodic_measurements(), period: 10_000}
-      # Add reporters as children of your supervision tree.
-      # {Telemetry.Metrics.ConsoleReporter, metrics: metrics()}
+      {:telemetry_poller, measurements: periodic_measurements(), period: 10_000},
+      # Prometheus metrics reporter
+      {TelemetryMetricsPrometheus.Core, metrics: metrics(), name: __MODULE__.Prometheus}
     ]
 
     Supervisor.init(children, strategy: :one_for_one)
@@ -22,64 +22,110 @@ defmodule EthuiWeb.Telemetry do
   def metrics do
     [
       # Phoenix Metrics
-      summary("phoenix.endpoint.start.system_time",
-        unit: {:native, :millisecond}
-      ),
-      summary("phoenix.endpoint.stop.duration",
-        unit: {:native, :millisecond}
+      distribution("phoenix.endpoint.stop.duration",
+        unit: {:native, :millisecond},
+        description: "Phoenix endpoint response time",
+        reporter_options: [buckets: [10, 50, 100, 250, 500, 1000, 2500, 5000, 10000]]
       ),
-      summary("phoenix.router_dispatch.start.system_time",
+      distribution("phoenix.router_dispatch.stop.duration",
         tags: [:route],
-        unit: {:native, :millisecond}
+        unit: {:native, :millisecond},
+        description: "Phoenix router dispatch time by route",
+        reporter_options: [buckets: [10, 50, 100, 250, 500, 1000, 2500, 5000, 10000]]
       ),
-      summary("phoenix.router_dispatch.exception.duration",
+      distribution("phoenix.router_dispatch.exception.duration",
         tags: [:route],
-        unit: {:native, :millisecond}
+        unit: {:native, :millisecond},
+        description: "Phoenix router exception duration",
+        reporter_options: [buckets: [10, 50, 100, 250, 500, 1000, 2500, 5000, 10000]]
       ),
-      summary("phoenix.router_dispatch.stop.duration",
-        tags: [:route],
-        unit: {:native, :millisecond}
+      distribution("phoenix.socket_connected.duration",
+        unit: {:native, :millisecond},
+        description: "WebSocket connection time",
+        reporter_options: [buckets: [100, 250, 500, 1000, 2500, 5000]]
       ),
-      summary("phoenix.socket_connected.duration",
-        unit: {:native, :millisecond}
+      sum("phoenix.socket_drain.count",
+        description: "WebSocket drain count"
       ),
-      sum("phoenix.socket_drain.count"),
-      summary("phoenix.channel_joined.duration",
-        unit: {:native, :millisecond}
+      distribution("phoenix.channel_joined.duration",
+        unit: {:native, :millisecond},
+        description: "Channel join duration",
+        reporter_options: [buckets: [100, 250, 500, 1000, 2500, 5000]]
       ),
-      summary("phoenix.channel_handled_in.duration",
+      distribution("phoenix.channel_handled_in.duration",
         tags: [:event],
-        unit: {:native, :millisecond}
+        unit: {:native, :millisecond},
+        description: "Channel message handling duration",
+        reporter_options: [buckets: [10, 50, 100, 250, 500, 1000]]
       ),
 
       # Database Metrics
-      summary("ethui.repo.query.total_time",
+      distribution("ethui.repo.query.total_time",
         unit: {:native, :millisecond},
-        description: "The sum of the other measurements"
+        description: "Total database query time",
+        reporter_options: [buckets: [1, 5, 10, 25, 50, 100, 250, 500, 1000]]
       ),
-      summary("ethui.repo.query.decode_time",
+      distribution("ethui.repo.query.decode_time",
         unit: {:native, :millisecond},
-        description: "The time spent decoding the data received from the database"
+        description: "Time spent decoding database results",
+        reporter_options: [buckets: [1, 5, 10, 25, 50, 100, 250]]
       ),
-      summary("ethui.repo.query.query_time",
+      distribution("ethui.repo.query.query_time",
         unit: {:native, :millisecond},
-        description: "The time spent executing the query"
+        description: "Time spent executing database query",
+        reporter_options: [buckets: [1, 5, 10, 25, 50, 100, 250, 500, 1000]]
       ),
-      summary("ethui.repo.query.queue_time",
+      distribution("ethui.repo.query.queue_time",
         unit: {:native, :millisecond},
-        description: "The time spent waiting for a database connection"
+        description: "Time spent waiting for database connection",
+        reporter_options: [buckets: [1, 5, 10, 25, 50, 100, 250]]
       ),
-      summary("ethui.repo.query.idle_time",
+      distribution("ethui.repo.query.idle_time",
         unit: {:native, :millisecond},
-        description:
-          "The time the connection spent waiting before being checked out for the query"
+        description: "Database connection idle time before query",
+        reporter_options: [buckets: [10, 50, 100, 250, 500, 1000]]
       ),
 
       # VM Metrics
-      summary("vm.memory.total", unit: {:byte, :kilobyte}),
-      summary("vm.total_run_queue_lengths.total"),
-      summary("vm.total_run_queue_lengths.cpu"),
-      summary("vm.total_run_queue_lengths.io")
+      last_value("vm.memory.total",
+        unit: {:byte, :kilobyte},
+        description: "Total VM memory usage"
+      ),
+      last_value("vm.total_run_queue_lengths.total",
+        description: "Total run queue length"
+      ),
+      last_value("vm.total_run_queue_lengths.cpu",
+        description: "CPU run queue length"
+      ),
+      last_value("vm.total_run_queue_lengths.io",
+        description: "IO run queue length"
+      ),
+
+      # Application Metrics
+      counter("ethui.stacks.created.count",
+        description: "Total number of stacks created"
+      ),
+      counter("ethui.stacks.deleted.count",
+        description: "Total number of stacks deleted"
+      ),
+      last_value("ethui.stacks.active.count",
+        description: "Current number of active stacks"
+      ),
+      counter("ethui.api.requests.count",
+        tags: [:method, :path, :status],
+        description: "API request count by method, path, and status"
+      ),
+      counter("ethui.auth.code_sent.count",
+        description: "Number of authentication codes sent"
+      ),
+      counter("ethui.auth.code_verified.count",
+        tags: [:status],
+        description: "Number of authentication verification attempts"
+      ),
+      counter("ethui.errors.count",
+        tags: [:type],
+        description: "Application errors by type"
+      )
     ]
   end
 

diff --git a/server/mix.exs b/server/mix.exs
@@ -57,6 +57,8 @@ defmodule Ethui.MixProject do
       {:finch, "~> 0.13"},
       {:telemetry_metrics, "~> 1.0"},
       {:telemetry_poller, "~> 1.0"},
+      {:telemetry_metrics_prometheus_core, "~> 1.2"},
+      {:logger_json, "~> 6.2"},
       {:jason, "~> 1.2"},
       {:dns_cluster, "~> 0.1.1"},
       {:plug_cowboy, "~> 2.7"},