diff --git a/CHANGELOG.md b/CHANGELOG.md index 21835a5c3..956765c6a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,9 @@ The format is based on [keep a changelog](http://keepachangelog.com) and this pr ## [Unreleased] +### Fixed +- Ignore negative custom runtime counter deltas to avoid panics during metrics collection. + ## [3.38.0] - 2026-03-20 ### Added - Add runtime Satori client feature to delete identities. diff --git a/server/metrics.go b/server/metrics.go index 9fbfee516..d2c0bbe1f 100644 --- a/server/metrics.go +++ b/server/metrics.go @@ -92,10 +92,11 @@ type LocalMetrics struct { currentRecvBytes *atomic.Int64 currentSentBytes *atomic.Int64 - PrometheusScope tally.Scope - prometheusCustomScope tally.Scope - prometheusCloser io.Closer - prometheusHTTPServer *http.Server + PrometheusScope tally.Scope + prometheusCustomScope tally.Scope + metricsCollectionScope tally.Scope + prometheusCloser io.Closer + prometheusHTTPServer *http.Server } func NewLocalMetrics(logger, startupLogger *zap.Logger, db *sql.DB, config Config) *LocalMetrics { @@ -162,8 +163,10 @@ func NewLocalMetrics(logger, startupLogger *zap.Logger, db *sql.DB, config Confi SanitizeOptions: &prometheus.DefaultSanitizerOpts, }, time.Duration(config.GetMetrics().ReportingFreqSec)*time.Second) m.prometheusCustomScope = m.PrometheusScope.SubScope(config.GetMetrics().CustomPrefix) + m.metricsCollectionScope = m.PrometheusScope if config.GetMetrics().CustomScopeLimit > 0 { m.prometheusCustomScope = newMetricsLimitedScope(m.prometheusCustomScope, int64(config.GetMetrics().CustomScopeLimit)) + m.metricsCollectionScope = newMetricsLimitedScope(m.metricsCollectionScope, int64(config.GetMetrics().CustomScopeLimit)) } // Check if exposing Prometheus metrics directly is enabled. @@ -523,6 +526,11 @@ func (m *LocalMetrics) StorageWriteRejectCount(tags map[string]string, delta int // CustomCounter adds the given delta to a counter with the specified name and tags. func (m *LocalMetrics) CustomCounter(name string, tags map[string]string, delta int64) { + if delta < 0 { + m.metricsCollectionScope.Tagged(map[string]string{"name": name, "err": "negative_increment"}).Counter("metrics_collection_errors_count").Inc(1) + return + } + scope := m.prometheusCustomScope if len(tags) != 0 { scope = scope.Tagged(tags) diff --git a/server/metrics_test.go b/server/metrics_test.go new file mode 100644 index 000000000..215c77840 --- /dev/null +++ b/server/metrics_test.go @@ -0,0 +1,41 @@ +// Copyright 2026 The Nakama Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package server + +import ( + "testing" + "time" + + "go.uber.org/zap" +) + +func TestMetricsCounterAddNegativeDoesNotPanic(t *testing.T) { + logger := zap.NewNop() + cfg := NewConfig(logger) + cfg.Metrics.ReportingFreqSec = 1 + reportingInterval := time.Duration(cfg.Metrics.ReportingFreqSec) * time.Second + flushWait := reportingInterval + 200*time.Millisecond + + metrics := NewLocalMetrics(logger, logger, nil, cfg) + defer metrics.Stop(logger) + + module := &RuntimeGoNakamaModule{metrics: metrics} + module.MetricsCounterAdd("panic_counter", nil, 1) + + time.Sleep(flushWait) + module.MetricsCounterAdd("panic_counter", nil, -1) + + time.Sleep(flushWait) +}