Skip to content

Commit f79807f

Browse files
authored
Switch prometheus/client_golang to a fork with support for /metrics endpoint filtering by name[] (#13746)
<!-- Thanks for sending a pull request! Before submitting: 1. Read our CONTRIBUTING.md guide 2. Rebase your PR if it gets out of sync with main --> #### What this PR does Switches to a fork of `client_golang` that contains the PR prometheus/client_golang#1925 which supports `/metrics` endpoint handler filtering through the use of `name[]` query param. I added a test to make sure this functionality doesn't disappear in a possible future removal of this replace directive. Also updates the costattribution package as this version now has changes introduced by @duricanikolic in prometheus/client_golang#1902 #### Which issue(s) this PR fixes or relates to Fixes an internal issue #### Checklist - [x] Tests updated. - [ ] Documentation added. - [x] `CHANGELOG.md` updated - the order of entries should be `[CHANGE]`, `[FEATURE]`, `[ENHANCEMENT]`, `[BUGFIX]`. If changelog entry is not needed, please add the `changelog-not-needed` label to the PR. - [ ] [`about-versioning.md`](https://github.com/grafana/mimir/blob/main/docs/sources/mimir/configure/about-versioning.md) updated with experimental features. <!-- CURSOR_SUMMARY --> --- > [!NOTE] > Switch to a forked `client_golang` to add `/metrics` filtering via `name[]`, update cost-attribution validation and tests, adapt API usage, and bump `compress`. > > - **Server/HTTP metrics**: > - Enable metric filtering on `/metrics` via `name[]` query param (promhttp handler change). Added test `TestMetricsEndpointSupportsMetricFiltering`. > - **Dependencies**: > - Replace `github.com/prometheus/client_golang` with fork `github.com/colega/prometheus-client_golang` (pulls in filtering support) and vendor updates. > - Bump `github.com/klauspost/compress` to `v1.18.2` and vendor changes. > - **Cost attribution**: > - Simplify metric descriptor validation using `prometheus.Desc.Err()`; adjust error expectations in tests. > - **Integration tests client**: > - Adapt `LabelNames` to new `model.LabelNames` return type and convert to `[]string`. > - **Changelog**: > - Document `/metrics` filtering enhancement. > > <sup>Written by [Cursor Bugbot](https://cursor.com/dashboard?tab=bugbot) for commit 94aa553. This will update automatically on new commits. Configure [here](https://cursor.com/dashboard?tab=bugbot).</sup> <!-- /CURSOR_SUMMARY --> --------- Signed-off-by: Oleg Zaytsev <[email protected]>
1 parent 2aead04 commit f79807f

File tree

17 files changed

+277
-250
lines changed

17 files changed

+277
-250
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@
7373
* [ENHANCEMENT] OTLP: Add metric `cortex_distributor_otlp_requests_by_content_type_total` to track content type (json or proto) of OTLP packets. #13525
7474
* [ENHANCEMENT] OTLP: Add experimental metric `cortex_distributor_otlp_array_lengths` to better understand the layout of OTLP packets in practice. #13525
7575
* [ENHANCEMENT] Ruler: gRPC errors without details are classified as `operator` errors, and rule evaluation failures (such as duplicate labelsets) are classified as `user` errors. #13586
76+
* [ENHANCEMENT] Server: The `/metrics` endpoint now supports metrics filtering by providing one or more `name[]` query parameters. #13746
7677
* [BUGFIX] Compactor: Fix potential concurrent map writes. #13053
7778
* [BUGFIX] Query-frontend: Fix issue where queries sometimes fail with `failed to receive query result stream message: rpc error: code = Canceled desc = context canceled` if remote execution is enabled. #13084
7879
* [BUGFIX] Query-frontend: Fix issue where query stats, such as series read, did not include the parameters to the `histogram_quantile` and `histogram_fraction` functions if remote execution was enabled. #13084

go.mod

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -382,3 +382,7 @@ replace github.com/prometheus/alertmanager => github.com/grafana/prometheus-aler
382382
// Use Mimir fork of prometheus/otlptranslator to allow for higher velocity of upstream development,
383383
// while allowing Mimir to move at a more conservative pace.
384384
replace github.com/prometheus/otlptranslator => github.com/grafana/mimir-otlptranslator v0.0.0-20251017074411-ea1e8f863e1d
385+
386+
// Use a fork of client_golang with changes from:
387+
// - https://github.com/prometheus/client_golang/pull/1925
388+
replace github.com/prometheus/client_golang => github.com/colega/prometheus-client_golang v1.19.1-0.20251204143415-11cda2079634

go.sum

Lines changed: 65 additions & 38 deletions
Large diffs are not rendered by default.

integration/e2emimir/client.go

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -522,7 +522,16 @@ func (c *Client) LabelNames(start, end time.Time, matches []string, opts ...prom
522522
defer cancel()
523523

524524
result, _, err := c.querierClient.LabelNames(ctx, matches, start, end, opts...)
525-
return result, err
525+
if err != nil {
526+
return nil, err
527+
}
528+
529+
// Convert model.LabelNames to []string
530+
names := make([]string, len(result))
531+
for i, name := range result {
532+
names[i] = string(name)
533+
}
534+
return names, nil
526535
}
527536

528537
// LabelNamesAndValues returns distinct label values per label name.

pkg/costattribution/active_tracker_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,14 +36,14 @@ func TestNewActiveTracker(t *testing.T) {
3636
},
3737
"incorrect label name causes an error single label": {
3838
costAttributionLabels: costattributionmodel.Labels{{Input: "__bad_label__", Output: "__bad_label__"}},
39-
expectedErr: fmt.Errorf(`failed to create an active series tracker for tenant tenant-1: descriptor Desc{fqName: "cortex_ingester_attributed_active_series", help: "The total number of active series per user and attribution.", constLabels: {}, variableLabels: {__bad_label__,tenant}} is invalid: "__bad_label__" is not a valid label name for metric "cortex_ingester_attributed_active_series"`),
39+
expectedErr: fmt.Errorf(`failed to create an active series tracker for tenant tenant-1: "__bad_label__" is not a valid label name for metric "cortex_ingester_attributed_active_series"`),
4040
},
4141
"incorrect label name causes an error multiple labels": {
4242
costAttributionLabels: costattributionmodel.Labels{
4343
{Input: "good_label", Output: "good_label"},
4444
{Input: "__bad_label__", Output: "__bad_label__"},
4545
},
46-
expectedErr: fmt.Errorf(`failed to create an active series tracker for tenant tenant-1: descriptor Desc{fqName: "cortex_ingester_attributed_active_series", help: "The total number of active series per user and attribution.", constLabels: {}, variableLabels: {good_label,__bad_label__,tenant}} is invalid: "__bad_label__" is not a valid label name for metric "cortex_ingester_attributed_active_series"`),
46+
expectedErr: fmt.Errorf(`failed to create an active series tracker for tenant tenant-1: "__bad_label__" is not a valid label name for metric "cortex_ingester_attributed_active_series"`),
4747
},
4848
}
4949

pkg/costattribution/metrics.go

Lines changed: 8 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,7 @@ import (
1111
// descriptor wraps a prometheus.Desc with additional metadata for metric validation and creation.
1212
// This wrapper is needed because prometheus.Desc doesn't expose important information about the
1313
// descriptor itself, such as metric name, label names, and a possible error obtained during the
14-
// descriptor construction. The first two are accessible via the name and labels fields, while
15-
// construction error can be obtained by calling validate().
14+
// descriptor construction. The first two are accessible via the name and labels fields.
1615
type descriptor struct {
1716
desc *prometheus.Desc
1817
name string
@@ -23,30 +22,15 @@ type descriptor struct {
2322
// It returns an error if a creation of the underlying prometheus.Desc is not successful, for example when the metric name
2423
// or any of the label names don't conform to Prometheus naming conventions.
2524
func newDescriptor(name string, help string, labels []string, constLabels prometheus.Labels) (*descriptor, error) {
26-
desc := &descriptor{
27-
desc: prometheus.NewDesc(name, help, labels, constLabels),
25+
desc := prometheus.NewDesc(name, help, labels, constLabels)
26+
if desc.Err() != nil {
27+
return nil, desc.Err()
28+
}
29+
return &descriptor{
30+
desc: desc,
2831
name: name,
2932
labels: labels,
30-
}
31-
if err := desc.validate(); err != nil {
32-
return nil, err
33-
}
34-
return desc, nil
35-
}
36-
37-
// validate checks whether the underlying prometheus.Desc is valid.
38-
//
39-
// A prometheus.Desc may contain an internal error if created with invalid
40-
// metric or label names. Since this error is unexported, validate detects
41-
// such cases and returns the underlying error if present. Otherwise, it
42-
// returns nil.
43-
func (d *descriptor) validate() error {
44-
reg := prometheus.NewRegistry()
45-
if err := reg.Register(d); err != nil {
46-
return err
47-
}
48-
49-
return nil
33+
}, nil
5034
}
5135

5236
// Describe implements the prometheus.Collector interface by sending this descriptor to the provided channel.

pkg/costattribution/metrics_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ func TestNewDescriptor(t *testing.T) {
3838
help: "Metric with invalid label",
3939
labels: []string{"__bad_label__"},
4040
constLabels: nil,
41-
expectedErr: fmt.Errorf(`descriptor Desc{fqName: "test_metric", help: "Metric with invalid label", constLabels: {}, variableLabels: {__bad_label__}} is invalid: "__bad_label__" is not a valid label name for metric "test_metric"`),
41+
expectedErr: fmt.Errorf(`"__bad_label__" is not a valid label name for metric "test_metric"`),
4242
},
4343
}
4444

pkg/costattribution/sample_tracker_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,14 +37,14 @@ func TestNewSampleTracker(t *testing.T) {
3737
},
3838
"incorrect label name causes an error single label": {
3939
costAttributionLabels: costattributionmodel.Labels{{Input: "__bad_label__", Output: "__bad_label__"}},
40-
expectedErr: fmt.Errorf(`failed to create a sample tracker for tenant tenant-1: descriptor Desc{fqName: "cortex_distributor_received_attributed_samples_total", help: "The total number of samples that were received per attribution.", constLabels: {}, variableLabels: {__bad_label__,tenant}} is invalid: "__bad_label__" is not a valid label name for metric "cortex_distributor_received_attributed_samples_total"`),
40+
expectedErr: fmt.Errorf(`failed to create a sample tracker for tenant tenant-1: "__bad_label__" is not a valid label name for metric "cortex_distributor_received_attributed_samples_total"`),
4141
},
4242
"incorrect label name causes an error multiple labels": {
4343
costAttributionLabels: costattributionmodel.Labels{
4444
{Input: "good_label", Output: "good_label"},
4545
{Input: "__bad_label__", Output: "__bad_label__"},
4646
},
47-
expectedErr: fmt.Errorf(`failed to create a sample tracker for tenant tenant-1: descriptor Desc{fqName: "cortex_distributor_received_attributed_samples_total", help: "The total number of samples that were received per attribution.", constLabels: {}, variableLabels: {good_label,__bad_label__,tenant}} is invalid: "__bad_label__" is not a valid label name for metric "cortex_distributor_received_attributed_samples_total"`),
47+
expectedErr: fmt.Errorf(`failed to create a sample tracker for tenant tenant-1: "__bad_label__" is not a valid label name for metric "cortex_distributor_received_attributed_samples_total"`),
4848
},
4949
}
5050

pkg/mimir/mimir_test.go

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,12 @@ import (
1212
"flag"
1313
"fmt"
1414
"io"
15+
"maps"
1516
"net"
1617
"net/http"
1718
"os"
1819
"path/filepath"
20+
"slices"
1921
"strconv"
2022
"strings"
2123
"syscall"
@@ -283,6 +285,66 @@ func TestMimirServerShutdownWithActivityTrackerEnabled(t *testing.T) {
283285
}
284286
}
285287

288+
func TestMetricsEndpointSupportsMetricFiltering(t *testing.T) {
289+
// This test checks that our /metrics endpoint handler supports metric filtering through the usage of name[] query param.
290+
// This is added to prometheus/client_golang in https://github.com/prometheus/client_golang/pull/1925,
291+
// but not merged yet so we use a replace directive on the module to bring this functionality.
292+
// This test check that we haven't lost that replace directive (and functionality).
293+
//
294+
// We target OverridesExporter because it's the target with least dependencies and easier to set up.
295+
args := []string{
296+
"-target=" + OverridesExporter,
297+
"-server.http-listen-port=0",
298+
"-server.grpc-listen-port=0",
299+
}
300+
var cfg Config
301+
fs := flag.NewFlagSet("test", flag.PanicOnError)
302+
cfg.RegisterFlags(fs, log.NewNopLogger())
303+
require.NoError(t, fs.Parse(args))
304+
require.NoError(t, cfg.Validate(log.NewNopLogger()))
305+
306+
c, err := New(cfg, prometheus.NewPedanticRegistry())
307+
require.NoError(t, err)
308+
309+
serviceMap, err := c.ModuleManager.InitModuleServices(cfg.Target...)
310+
require.NoError(t, err)
311+
require.NotNil(t, serviceMap)
312+
313+
servs := slices.Collect(maps.Values(serviceMap))
314+
sm, err := services.NewManager(servs...)
315+
require.NoError(t, err)
316+
317+
assert.NoError(t, sm.StartAsync(t.Context()))
318+
t.Cleanup(sm.StopAsync)
319+
320+
{
321+
// One metric
322+
res, err := http.Get(fmt.Sprintf("http://%s/metrics?name[]=go_info", c.Server.HTTPListenAddr()))
323+
require.NoError(t, err)
324+
defer res.Body.Close()
325+
assert.Equal(t, http.StatusOK, res.StatusCode)
326+
body, err := io.ReadAll(res.Body)
327+
require.NoError(t, err)
328+
assert.Contains(t, string(body), "go_info")
329+
assert.NotContains(t, string(body), "deprecated_flags_inuse_total")
330+
require.Equal(t, 1, strings.Count(string(body), "HELP"))
331+
}
332+
333+
{
334+
// Two metrics
335+
res, err := http.Get(fmt.Sprintf("http://%s/metrics?name[]=go_info&name[]=deprecated_flags_inuse_total", c.Server.HTTPListenAddr()))
336+
require.NoError(t, err)
337+
defer res.Body.Close()
338+
assert.Equal(t, http.StatusOK, res.StatusCode)
339+
body, err := io.ReadAll(res.Body)
340+
require.NoError(t, err)
341+
assert.Contains(t, string(body), "go_info")
342+
assert.Contains(t, string(body), "deprecated_flags_inuse_total")
343+
assert.NotContains(t, string(body), "go_gc_duration_seconds")
344+
require.Equal(t, 2, strings.Count(string(body), "HELP"))
345+
}
346+
}
347+
286348
func TestConfigValidation(t *testing.T) {
287349
for _, tc := range []struct {
288350
name string

vendor/github.com/prometheus/client_golang/api/client.go

Lines changed: 6 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)