Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ require (
sigs.k8s.io/yaml v1.6.0
)

require github.com/stretchr/testify v1.11.1

require (
cel.dev/expr v0.25.1 // indirect
github.com/Masterminds/semver/v3 v3.4.0 // indirect
Expand Down
133 changes: 132 additions & 1 deletion pkg/metrics/client_go_adapter.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,10 @@ package metrics

import (
"context"
"net/url"
"sync"
"sync/atomic"
"time"

"github.com/prometheus/client_golang/prometheus"
clientmetrics "k8s.io/client-go/tools/metrics"
Expand All @@ -37,8 +41,68 @@ var (
},
[]string{"code", "method", "host"},
)

requestLatency = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Name: "rest_client_request_duration_seconds",
Help: "Request latency in seconds. Broken down by verb and host.",
Buckets: []float64{0.005, 0.025, 0.1, 0.25, 0.5, 1.0, 2.0, 4.0, 8.0, 15.0, 30.0, 60.0},
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we support native histograms from the get-go?

},
[]string{"verb", "host"},
)

resolverLatency = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Name: "rest_client_dns_resolution_duration_seconds",
Help: "DNS resolver latency in seconds. Broken down by host.",
Buckets: []float64{0.005, 0.025, 0.1, 0.25, 0.5, 1.0, 2.0, 4.0, 8.0, 15.0, 30.0, 60.0},
},
[]string{"host"},
)

requestSize = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Name: "rest_client_request_size_bytes",
Help: "Request size in bytes. Broken down by verb and host.",
// 64 bytes to 16MB
Buckets: []float64{64, 256, 512, 1024, 4096, 16384, 65536, 262144, 1048576, 4194304, 16777216},
},
[]string{"verb", "host"},
)

responseSize = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Name: "rest_client_response_size_bytes",
Help: "Response size in bytes. Broken down by verb and host.",
// 64 bytes to 16MB
Buckets: []float64{64, 256, 512, 1024, 4096, 16384, 65536, 262144, 1048576, 4194304, 16777216},
},
[]string{"verb", "host"},
)

rateLimiterLatency = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Name: "rest_client_rate_limiter_duration_seconds",
Help: "Client side rate limiter latency in seconds. Broken down by verb, and host.",
Buckets: []float64{0.005, 0.025, 0.1, 0.25, 0.5, 1.0, 2.0, 4.0, 8.0, 15.0, 30.0, 60.0},
},
[]string{"verb", "host"},
)

requestRetry = prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "rest_client_request_retries_total",
Help: "Number of request retries, partitioned by status code, verb, and host.",
},
[]string{"code", "verb", "host"},
)
)

// restMetricsEnabled gates the opt-in adapters below, they discard observations
// until RegisterDefaultRESTClientMetrics flips it. requestResult stays on
// unconditionally to preserve existing behavior.
var restMetricsEnabled atomic.Bool
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would it help readability if we used an atomic pointer that switches the entire adapterset rather than having a check in each method? Is there a no-op from client-go we can reuse?


func init() {
registerClientMetrics()
}
Expand All @@ -50,7 +114,30 @@ func registerClientMetrics() {

// register the metrics with client-go
clientmetrics.Register(clientmetrics.RegisterOpts{
RequestResult: &resultAdapter{metric: requestResult},
RequestResult: &resultAdapter{metric: requestResult},
RequestLatency: &latencyAdapter{metric: requestLatency},
ResolverLatency: &resolverLatencyAdapter{metric: resolverLatency},
RequestSize: &sizeAdapter{metric: requestSize},
ResponseSize: &sizeAdapter{metric: responseSize},
RateLimiterLatency: &latencyAdapter{metric: rateLimiterLatency},
RequestRetry: &retryAdapter{metric: requestRetry},
})
}

var registerDefaultsOnce sync.Once

// RegisterDefaultRESTClientMetrics enables the client metrics.
func RegisterDefaultRESTClientMetrics() {
registerDefaultsOnce.Do(func() {
Registry.MustRegister(
requestLatency,
resolverLatency,
requestSize,
responseSize,
rateLimiterLatency,
requestRetry,
)
restMetricsEnabled.Store(true)
})
}

Expand All @@ -69,3 +156,47 @@ type resultAdapter struct {
func (r *resultAdapter) Increment(_ context.Context, code, method, host string) {
r.metric.WithLabelValues(code, method, host).Inc()
}

type latencyAdapter struct {
metric *prometheus.HistogramVec
}

func (l *latencyAdapter) Observe(_ context.Context, verb string, u url.URL, duration time.Duration) {
if !restMetricsEnabled.Load() {
return
}
l.metric.WithLabelValues(verb, u.Host).Observe(duration.Seconds())
}

type resolverLatencyAdapter struct {
metric *prometheus.HistogramVec
}

func (r *resolverLatencyAdapter) Observe(_ context.Context, host string, duration time.Duration) {
if !restMetricsEnabled.Load() {
return
}
r.metric.WithLabelValues(host).Observe(duration.Seconds())
}

type sizeAdapter struct {
metric *prometheus.HistogramVec
}

func (r *sizeAdapter) Observe(_ context.Context, verb string, host string, size float64) {
if !restMetricsEnabled.Load() {
return
}
r.metric.WithLabelValues(verb, host).Observe(size)
}

type retryAdapter struct {
metric *prometheus.CounterVec
}

func (r *retryAdapter) IncrementRetry(_ context.Context, code, verb, host string) {
if !restMetricsEnabled.Load() {
return
}
r.metric.WithLabelValues(code, verb, host).Inc()
}
82 changes: 82 additions & 0 deletions pkg/metrics/client_go_adapter_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
/*
Copyright 2026 The Kubernetes Authors.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package metrics

import (
"context"
"net/url"
"testing"
"time"

"github.com/stretchr/testify/require"
clientmetrics "k8s.io/client-go/tools/metrics"
)

var optInMetricNames = []string{
"rest_client_request_duration_seconds",
"rest_client_dns_resolution_duration_seconds",
"rest_client_request_size_bytes",
"rest_client_response_size_bytes",
"rest_client_rate_limiter_duration_seconds",
"rest_client_request_retries_total",
}

func observeAllRESTClientMetrics() {
ctx := context.Background()

Check failure on line 39 in pkg/metrics/client_go_adapter_test.go

View workflow job for this annotation

GitHub Actions / lint

use of `context.Background` forbidden because "Use ginkgos SpecContext or go testings t.Context instead" (forbidigo)
clientmetrics.RequestResult.Increment(ctx, "200", "GET", "example.com")
clientmetrics.RequestLatency.Observe(ctx, "GET", url.URL{Host: "example.com"}, 1*time.Second)
clientmetrics.ResolverLatency.Observe(ctx, "example.com", 1*time.Second)
clientmetrics.RequestSize.Observe(ctx, "GET", "example.com", 1024)
clientmetrics.ResponseSize.Observe(ctx, "GET", "example.com", 1024)
clientmetrics.RateLimiterLatency.Observe(ctx, "GET", url.URL{Host: "example.com"}, 1*time.Second)
clientmetrics.RequestRetry.IncrementRetry(ctx, "200", "GET", "example.com")
}

func gatheredNames(t *testing.T) map[string]struct{} {
t.Helper()
mfs, err := Registry.Gather()
require.NoError(t, err)

names := make(map[string]struct{})
for _, mf := range mfs {
names[mf.GetName()] = struct{}{}
}
return names
}

func TestRESTClientMetrics(t *testing.T) {
observeAllRESTClientMetrics()

names := gatheredNames(t)
if _, ok := names["rest_client_requests_total"]; !ok {
t.Error("metric rest_client_requests_total should be exposed by default")
}
for _, name := range optInMetricNames {
_, found := names[name]
require.False(t, found, "metric %s should not be found before calling RegisterDefaultRESTClientMetrics", name)
}

RegisterDefaultRESTClientMetrics()
observeAllRESTClientMetrics()

names = gatheredNames(t)
for _, name := range optInMetricNames {
_, found := names[name]
require.True(t, found, "metric %s not found", name)
}

}

Check failure on line 82 in pkg/metrics/client_go_adapter_test.go

View workflow job for this annotation

GitHub Actions / lint

unnecessary trailing newline (whitespace)
Loading