diff --git a/go.mod b/go.mod index 75bb3afbda..d0a394000d 100644 --- a/go.mod +++ b/go.mod @@ -31,6 +31,8 @@ require ( sigs.k8s.io/yaml v1.6.0 ) +require github.com/stretchr/testify v1.11.1 + require ( cel.dev/expr v0.25.1 // indirect github.com/Masterminds/semver/v3 v3.4.0 // indirect diff --git a/pkg/metrics/client_go_adapter.go b/pkg/metrics/client_go_adapter.go index ff28998c44..2d5df63096 100644 --- a/pkg/metrics/client_go_adapter.go +++ b/pkg/metrics/client_go_adapter.go @@ -18,6 +18,10 @@ package metrics import ( "context" + "net/url" + "sync" + "sync/atomic" + "time" "github.com/prometheus/client_golang/prometheus" clientmetrics "k8s.io/client-go/tools/metrics" @@ -37,8 +41,68 @@ var ( }, []string{"code", "method", "host"}, ) + + requestLatency = prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Name: "rest_client_request_duration_seconds", + Help: "Request latency in seconds. Broken down by verb and host.", + Buckets: []float64{0.005, 0.025, 0.1, 0.25, 0.5, 1.0, 2.0, 4.0, 8.0, 15.0, 30.0, 60.0}, + }, + []string{"verb", "host"}, + ) + + resolverLatency = prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Name: "rest_client_dns_resolution_duration_seconds", + Help: "DNS resolver latency in seconds. Broken down by host.", + Buckets: []float64{0.005, 0.025, 0.1, 0.25, 0.5, 1.0, 2.0, 4.0, 8.0, 15.0, 30.0, 60.0}, + }, + []string{"host"}, + ) + + requestSize = prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Name: "rest_client_request_size_bytes", + Help: "Request size in bytes. Broken down by verb and host.", + // 64 bytes to 16MB + Buckets: []float64{64, 256, 512, 1024, 4096, 16384, 65536, 262144, 1048576, 4194304, 16777216}, + }, + []string{"verb", "host"}, + ) + + responseSize = prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Name: "rest_client_response_size_bytes", + Help: "Response size in bytes. Broken down by verb and host.", + // 64 bytes to 16MB + Buckets: []float64{64, 256, 512, 1024, 4096, 16384, 65536, 262144, 1048576, 4194304, 16777216}, + }, + []string{"verb", "host"}, + ) + + rateLimiterLatency = prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Name: "rest_client_rate_limiter_duration_seconds", + Help: "Client side rate limiter latency in seconds. Broken down by verb, and host.", + Buckets: []float64{0.005, 0.025, 0.1, 0.25, 0.5, 1.0, 2.0, 4.0, 8.0, 15.0, 30.0, 60.0}, + }, + []string{"verb", "host"}, + ) + + requestRetry = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: "rest_client_request_retries_total", + Help: "Number of request retries, partitioned by status code, verb, and host.", + }, + []string{"code", "verb", "host"}, + ) ) +// restMetricsEnabled gates the opt-in adapters below, they discard observations +// until RegisterDefaultRESTClientMetrics flips it. requestResult stays on +// unconditionally to preserve existing behavior. +var restMetricsEnabled atomic.Bool + func init() { registerClientMetrics() } @@ -50,7 +114,30 @@ func registerClientMetrics() { // register the metrics with client-go clientmetrics.Register(clientmetrics.RegisterOpts{ - RequestResult: &resultAdapter{metric: requestResult}, + RequestResult: &resultAdapter{metric: requestResult}, + RequestLatency: &latencyAdapter{metric: requestLatency}, + ResolverLatency: &resolverLatencyAdapter{metric: resolverLatency}, + RequestSize: &sizeAdapter{metric: requestSize}, + ResponseSize: &sizeAdapter{metric: responseSize}, + RateLimiterLatency: &latencyAdapter{metric: rateLimiterLatency}, + RequestRetry: &retryAdapter{metric: requestRetry}, + }) +} + +var registerDefaultsOnce sync.Once + +// RegisterDefaultRESTClientMetrics enables the client metrics. +func RegisterDefaultRESTClientMetrics() { + registerDefaultsOnce.Do(func() { + Registry.MustRegister( + requestLatency, + resolverLatency, + requestSize, + responseSize, + rateLimiterLatency, + requestRetry, + ) + restMetricsEnabled.Store(true) }) } @@ -69,3 +156,47 @@ type resultAdapter struct { func (r *resultAdapter) Increment(_ context.Context, code, method, host string) { r.metric.WithLabelValues(code, method, host).Inc() } + +type latencyAdapter struct { + metric *prometheus.HistogramVec +} + +func (l *latencyAdapter) Observe(_ context.Context, verb string, u url.URL, duration time.Duration) { + if !restMetricsEnabled.Load() { + return + } + l.metric.WithLabelValues(verb, u.Host).Observe(duration.Seconds()) +} + +type resolverLatencyAdapter struct { + metric *prometheus.HistogramVec +} + +func (r *resolverLatencyAdapter) Observe(_ context.Context, host string, duration time.Duration) { + if !restMetricsEnabled.Load() { + return + } + r.metric.WithLabelValues(host).Observe(duration.Seconds()) +} + +type sizeAdapter struct { + metric *prometheus.HistogramVec +} + +func (r *sizeAdapter) Observe(_ context.Context, verb string, host string, size float64) { + if !restMetricsEnabled.Load() { + return + } + r.metric.WithLabelValues(verb, host).Observe(size) +} + +type retryAdapter struct { + metric *prometheus.CounterVec +} + +func (r *retryAdapter) IncrementRetry(_ context.Context, code, verb, host string) { + if !restMetricsEnabled.Load() { + return + } + r.metric.WithLabelValues(code, verb, host).Inc() +} diff --git a/pkg/metrics/client_go_adapter_test.go b/pkg/metrics/client_go_adapter_test.go new file mode 100644 index 0000000000..201d61968d --- /dev/null +++ b/pkg/metrics/client_go_adapter_test.go @@ -0,0 +1,82 @@ +/* +Copyright 2026 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package metrics + +import ( + "context" + "net/url" + "testing" + "time" + + "github.com/stretchr/testify/require" + clientmetrics "k8s.io/client-go/tools/metrics" +) + +var optInMetricNames = []string{ + "rest_client_request_duration_seconds", + "rest_client_dns_resolution_duration_seconds", + "rest_client_request_size_bytes", + "rest_client_response_size_bytes", + "rest_client_rate_limiter_duration_seconds", + "rest_client_request_retries_total", +} + +func observeAllRESTClientMetrics() { + ctx := context.Background() + clientmetrics.RequestResult.Increment(ctx, "200", "GET", "example.com") + clientmetrics.RequestLatency.Observe(ctx, "GET", url.URL{Host: "example.com"}, 1*time.Second) + clientmetrics.ResolverLatency.Observe(ctx, "example.com", 1*time.Second) + clientmetrics.RequestSize.Observe(ctx, "GET", "example.com", 1024) + clientmetrics.ResponseSize.Observe(ctx, "GET", "example.com", 1024) + clientmetrics.RateLimiterLatency.Observe(ctx, "GET", url.URL{Host: "example.com"}, 1*time.Second) + clientmetrics.RequestRetry.IncrementRetry(ctx, "200", "GET", "example.com") +} + +func gatheredNames(t *testing.T) map[string]struct{} { + t.Helper() + mfs, err := Registry.Gather() + require.NoError(t, err) + + names := make(map[string]struct{}) + for _, mf := range mfs { + names[mf.GetName()] = struct{}{} + } + return names +} + +func TestRESTClientMetrics(t *testing.T) { + observeAllRESTClientMetrics() + + names := gatheredNames(t) + if _, ok := names["rest_client_requests_total"]; !ok { + t.Error("metric rest_client_requests_total should be exposed by default") + } + for _, name := range optInMetricNames { + _, found := names[name] + require.False(t, found, "metric %s should not be found before calling RegisterDefaultRESTClientMetrics", name) + } + + RegisterDefaultRESTClientMetrics() + observeAllRESTClientMetrics() + + names = gatheredNames(t) + for _, name := range optInMetricNames { + _, found := names[name] + require.True(t, found, "metric %s not found", name) + } + +}