-
Notifications
You must be signed in to change notification settings - Fork 0
Prometheus + JSON logging for grafana #82
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 2 commits
eb04f33
3964670
838781f
3e28628
8fecfb8
7ad1a96
3170e80
b1a5026
a8ceda9
934c3a0
a309f8f
bc6d8ce
c447783
9acf22e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,19 @@ | ||
| defmodule EthuiWeb.MetricsController do | ||
| use EthuiWeb, :controller | ||
|
|
||
| @moduledoc """ | ||
| Exposes Prometheus metrics for scraping. | ||
|
|
||
| This endpoint is designed to be accessible only within the internal Docker/Dokploy network, | ||
| not via external Traefik routing. Prometheus should scrape metrics directly from the | ||
| application on port 4000. | ||
| """ | ||
|
|
||
| def index(conn, _params) do | ||
| metrics = TelemetryMetricsPrometheus.Core.scrape(EthuiWeb.Telemetry.Prometheus) | ||
|
|
||
| conn | ||
| |> put_resp_content_type("text/plain") | ||
| |> send_resp(200, metrics) | ||
| end | ||
| end | ||
|
naps62 marked this conversation as resolved.
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,68 @@ | ||
| defmodule EthuiWeb.Plugs.LogMetadata do | ||
| @moduledoc """ | ||
| Adds request-specific metadata to logs for structured JSON logging. | ||
|
|
||
| This plug enriches logs with contextual information including: | ||
| - Request ID, remote IP, HTTP method, and path | ||
| - User ID (if authenticated) | ||
| - Stack slug (if available from subdomain routing) | ||
| - Response status and duration (added before sending response) | ||
| """ | ||
|
|
||
| import Plug.Conn | ||
| require Logger | ||
|
|
||
| def init(opts), do: opts | ||
|
|
||
| def call(conn, _opts) do | ||
| Logger.metadata( | ||
| request_id: get_request_id(conn), | ||
| remote_ip: format_ip(conn.remote_ip), | ||
| method: conn.method, | ||
| path: conn.request_path | ||
| ) | ||
|
|
||
| # Add user_id if authenticated | ||
| case conn.assigns[:current_user] do | ||
| %{id: user_id} -> Logger.metadata(user_id: user_id) | ||
| _ -> :ok | ||
| end | ||
|
|
||
| # Add stack_slug if available (from subdomain routing) | ||
| case conn.assigns[:stack] do | ||
| %{slug: slug} -> Logger.metadata(stack_slug: slug) | ||
| _ -> :ok | ||
| end | ||
|
|
||
| register_before_send(conn, fn conn -> | ||
| # Add response metadata before sending | ||
| Logger.metadata( | ||
| status: conn.status, | ||
| duration: calculate_duration(conn) | ||
| ) | ||
| conn | ||
| end) | ||
|
naps62 marked this conversation as resolved.
|
||
| end | ||
|
|
||
| defp get_request_id(conn) do | ||
| case get_resp_header(conn, "x-request-id") do | ||
| [request_id] -> request_id | ||
|
naps62 marked this conversation as resolved.
|
||
| _ -> Logger.metadata()[:request_id] | ||
| end | ||
| end | ||
|
|
||
| defp format_ip({a, b, c, d}), do: "#{a}.#{b}.#{c}.#{d}" | ||
|
naps62 marked this conversation as resolved.
|
||
| defp format_ip(ip), do: inspect(ip) | ||
|
|
||
| defp calculate_duration(conn) do | ||
| case conn.private[:phoenix_endpoint_start] do | ||
| %{system: start} -> | ||
| System.monotonic_time() | ||
| |> Kernel.-(start) | ||
| |> System.convert_time_unit(:native, :microsecond) | ||
|
|
||
| _ -> | ||
| nil | ||
| end | ||
| end | ||
| end | ||
|
naps62 marked this conversation as resolved.
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -11,9 +11,9 @@ defmodule EthuiWeb.Telemetry do | |
| children = [ | ||
| # Telemetry poller will execute the given period measurements | ||
| # every 10_000ms. Learn more here: https://hexdocs.pm/telemetry_metrics | ||
| {:telemetry_poller, measurements: periodic_measurements(), period: 10_000} | ||
| # Add reporters as children of your supervision tree. | ||
| # {Telemetry.Metrics.ConsoleReporter, metrics: metrics()} | ||
| {:telemetry_poller, measurements: periodic_measurements(), period: 10_000}, | ||
| # Prometheus metrics reporter | ||
| {TelemetryMetricsPrometheus.Core, metrics: metrics(), name: __MODULE__.Prometheus} | ||
| ] | ||
|
|
||
| Supervisor.init(children, strategy: :one_for_one) | ||
|
|
@@ -22,64 +22,110 @@ defmodule EthuiWeb.Telemetry do | |
| def metrics do | ||
| [ | ||
| # Phoenix Metrics | ||
| summary("phoenix.endpoint.start.system_time", | ||
| unit: {:native, :millisecond} | ||
| ), | ||
| summary("phoenix.endpoint.stop.duration", | ||
| unit: {:native, :millisecond} | ||
| distribution("phoenix.endpoint.stop.duration", | ||
| unit: {:native, :millisecond}, | ||
| description: "Phoenix endpoint response time", | ||
| reporter_options: [buckets: [10, 50, 100, 250, 500, 1000, 2500, 5000, 10000]] | ||
| ), | ||
| summary("phoenix.router_dispatch.start.system_time", | ||
| distribution("phoenix.router_dispatch.stop.duration", | ||
| tags: [:route], | ||
| unit: {:native, :millisecond} | ||
| unit: {:native, :millisecond}, | ||
| description: "Phoenix router dispatch time by route", | ||
| reporter_options: [buckets: [10, 50, 100, 250, 500, 1000, 2500, 5000, 10000]] | ||
| ), | ||
| summary("phoenix.router_dispatch.exception.duration", | ||
| distribution("phoenix.router_dispatch.exception.duration", | ||
| tags: [:route], | ||
| unit: {:native, :millisecond} | ||
| unit: {:native, :millisecond}, | ||
| description: "Phoenix router exception duration", | ||
| reporter_options: [buckets: [10, 50, 100, 250, 500, 1000, 2500, 5000, 10000]] | ||
| ), | ||
| summary("phoenix.router_dispatch.stop.duration", | ||
| tags: [:route], | ||
| unit: {:native, :millisecond} | ||
| distribution("phoenix.socket_connected.duration", | ||
| unit: {:native, :millisecond}, | ||
| description: "WebSocket connection time", | ||
| reporter_options: [buckets: [100, 250, 500, 1000, 2500, 5000]] | ||
| ), | ||
| summary("phoenix.socket_connected.duration", | ||
| unit: {:native, :millisecond} | ||
| sum("phoenix.socket_drain.count", | ||
| description: "WebSocket drain count" | ||
| ), | ||
| sum("phoenix.socket_drain.count"), | ||
| summary("phoenix.channel_joined.duration", | ||
| unit: {:native, :millisecond} | ||
| distribution("phoenix.channel_joined.duration", | ||
| unit: {:native, :millisecond}, | ||
| description: "Channel join duration", | ||
| reporter_options: [buckets: [100, 250, 500, 1000, 2500, 5000]] | ||
| ), | ||
| summary("phoenix.channel_handled_in.duration", | ||
| distribution("phoenix.channel_handled_in.duration", | ||
| tags: [:event], | ||
| unit: {:native, :millisecond} | ||
| unit: {:native, :millisecond}, | ||
| description: "Channel message handling duration", | ||
| reporter_options: [buckets: [10, 50, 100, 250, 500, 1000]] | ||
| ), | ||
|
|
||
| # Database Metrics | ||
| summary("ethui.repo.query.total_time", | ||
| distribution("ethui.repo.query.total_time", | ||
| unit: {:native, :millisecond}, | ||
| description: "The sum of the other measurements" | ||
| description: "Total database query time", | ||
| reporter_options: [buckets: [1, 5, 10, 25, 50, 100, 250, 500, 1000]] | ||
| ), | ||
| summary("ethui.repo.query.decode_time", | ||
| distribution("ethui.repo.query.decode_time", | ||
| unit: {:native, :millisecond}, | ||
| description: "The time spent decoding the data received from the database" | ||
| description: "Time spent decoding database results", | ||
| reporter_options: [buckets: [1, 5, 10, 25, 50, 100, 250]] | ||
| ), | ||
| summary("ethui.repo.query.query_time", | ||
| distribution("ethui.repo.query.query_time", | ||
| unit: {:native, :millisecond}, | ||
| description: "The time spent executing the query" | ||
| description: "Time spent executing database query", | ||
| reporter_options: [buckets: [1, 5, 10, 25, 50, 100, 250, 500, 1000]] | ||
| ), | ||
| summary("ethui.repo.query.queue_time", | ||
| distribution("ethui.repo.query.queue_time", | ||
| unit: {:native, :millisecond}, | ||
| description: "The time spent waiting for a database connection" | ||
| description: "Time spent waiting for database connection", | ||
| reporter_options: [buckets: [1, 5, 10, 25, 50, 100, 250]] | ||
| ), | ||
| summary("ethui.repo.query.idle_time", | ||
| distribution("ethui.repo.query.idle_time", | ||
| unit: {:native, :millisecond}, | ||
| description: | ||
| "The time the connection spent waiting before being checked out for the query" | ||
| description: "Database connection idle time before query", | ||
| reporter_options: [buckets: [10, 50, 100, 250, 500, 1000]] | ||
| ), | ||
|
naps62 marked this conversation as resolved.
|
||
|
|
||
| # VM Metrics | ||
| summary("vm.memory.total", unit: {:byte, :kilobyte}), | ||
| summary("vm.total_run_queue_lengths.total"), | ||
| summary("vm.total_run_queue_lengths.cpu"), | ||
| summary("vm.total_run_queue_lengths.io") | ||
| last_value("vm.memory.total", | ||
| unit: {:byte, :kilobyte}, | ||
| description: "Total VM memory usage" | ||
| ), | ||
| last_value("vm.total_run_queue_lengths.total", | ||
| description: "Total run queue length" | ||
| ), | ||
| last_value("vm.total_run_queue_lengths.cpu", | ||
| description: "CPU run queue length" | ||
| ), | ||
| last_value("vm.total_run_queue_lengths.io", | ||
| description: "IO run queue length" | ||
| ), | ||
|
|
||
| # Application Metrics | ||
| counter("ethui.stacks.created.count", | ||
| description: "Total number of stacks created" | ||
| ), | ||
| counter("ethui.stacks.deleted.count", | ||
| description: "Total number of stacks deleted" | ||
| ), | ||
| last_value("ethui.stacks.active.count", | ||
| description: "Current number of active stacks" | ||
| ), | ||
|
||
| counter("ethui.api.requests.count", | ||
| tags: [:method, :path, :status], | ||
| description: "API request count by method, path, and status" | ||
| ), | ||
|
naps62 marked this conversation as resolved.
Outdated
|
||
| counter("ethui.auth.code_sent.count", | ||
| description: "Number of authentication codes sent" | ||
| ), | ||
| counter("ethui.auth.code_verified.count", | ||
| tags: [:status], | ||
| description: "Number of authentication verification attempts" | ||
| ), | ||
| counter("ethui.errors.count", | ||
|
naps62 marked this conversation as resolved.
Outdated
|
||
| tags: [:type], | ||
| description: "Application errors by type" | ||
| ) | ||
|
naps62 marked this conversation as resolved.
Outdated
|
||
| ] | ||
| end | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Using
metadata: :allin the logger configuration may capture excessive metadata and could lead to performance issues or large log sizes, especially in production. This setting captures all metadata fields from the Logger context, including internal Erlang/OTP fields and any custom metadata added throughout the application.Consider explicitly listing the metadata fields you want to capture instead of using
:all. The:consoleconfiguration already has an explicit list of metadata fields (lines 92-107), which is the recommended approach. Apply the same pattern to the:default_handlerconfiguration.