From 1a71a2e18b917fcaaaffafa94aec686268c0903f Mon Sep 17 00:00:00 2001 From: Artyom Antonov Date: Thu, 10 Jul 2025 04:28:17 +0500 Subject: [PATCH 1/5] refactor tag count querying, use it in autocomplete --- autocomplete/autocomplete.go | 87 ++++++++++++----- deploy/doc/config.md | 2 +- doc/config.md | 2 +- finder/tagged.go | 151 +++++------------------------ finder/tagged_test.go | 15 --- finder/tags_count_querier.go | 179 +++++++++++++++++++++++++++++++++++ 6 files changed, 269 insertions(+), 167 deletions(-) create mode 100644 finder/tags_count_querier.go diff --git a/autocomplete/autocomplete.go b/autocomplete/autocomplete.go index 440fb122..36c3d472 100644 --- a/autocomplete/autocomplete.go +++ b/autocomplete/autocomplete.go @@ -71,13 +71,28 @@ func (h *Handler) ServeHTTP(w http.ResponseWriter, r *http.Request) { } } -func (h *Handler) requestExpr(r *http.Request) (*where.Where, *where.Where, map[string]bool, error) { - f := r.Form["expr"] - expr := make([]string, 0, len(f)) +func getTagCountQuerier(config *config.Config, opts clickhouse.Options) *finder.TagCountQuerier { + var tcq *finder.TagCountQuerier = nil + if config.ClickHouse.TagsCountTable != "" { + tcq = finder.NewTagCountQuerier( + config.ClickHouse.URL, + config.ClickHouse.TagsCountTable, + opts, + config.FeatureFlags.UseCarbonBehavior, + config.FeatureFlags.DontMatchMissingTags, + config.ClickHouse.TaggedUseDaily, + ) + } + return tcq +} - for i := 0; i < len(f); i++ { - if f[i] != "" { - expr = append(expr, f[i]) +func (h *Handler) requestExpr(r *http.Request, tcq *finder.TagCountQuerier, from, until int64) (*where.Where, *where.Where, map[string]bool, error) { + formExpr := r.Form["expr"] + expr := make([]string, 0, len(formExpr)) + + for i := 0; i < len(formExpr); i++ { + if formExpr[i] != "" { + expr = append(expr, formExpr[i]) } } @@ -95,6 +110,15 @@ func (h *Handler) requestExpr(r *http.Request) (*where.Where, *where.Where, map[ return wr, pw, usedTags, err } + if tcq != nil { + tagValuesCosts, err := tcq.GetCostsFromCountTable(r.Context(), terms, from, until) + if err != nil { + return wr, pw, usedTags, err + } + finder.SetCosts(terms, tagValuesCosts) + } + finder.SortTaggedTermsByCost(terms) + wr, pw, err = finder.TaggedWhere(terms, h.config.FeatureFlags.UseCarbonBehavior, h.config.FeatureFlags.DontMatchMissingTags) if err != nil { return wr, pw, usedTags, err @@ -214,6 +238,7 @@ func (h *Handler) ServeTags(w http.ResponseWriter, r *http.Request) { queueFail bool queueDuration time.Duration findCache bool + opts clickhouse.Options ) username := r.Header.Get("X-Forwarded-User") @@ -290,7 +315,21 @@ func (h *Handler) ServeTags(w http.ResponseWriter, r *http.Request) { } } - wr, pw, usedTags, err := h.requestExpr(r) + opts = clickhouse.Options{ + TLSConfig: h.config.ClickHouse.TLSConfig, + Timeout: h.config.ClickHouse.IndexTimeout, + ConnectTimeout: h.config.ClickHouse.ConnectTimeout, + CheckRequestProgress: h.config.FeatureFlags.LogQueryProgress, + ProgressSendingInterval: h.config.ClickHouse.ProgressSendingInterval, + } + + wr, pw, usedTags, err := h.requestExpr( + r, + getTagCountQuerier(h.config, opts), + start.AddDate(0, 0, -h.config.ClickHouse.TaggedAutocompleDays).Unix(), + start.Unix(), + ) + if err != nil { status = http.StatusBadRequest http.Error(w, err.Error(), status) @@ -366,13 +405,7 @@ func (h *Handler) ServeTags(w http.ResponseWriter, r *http.Request) { scope.WithTable(r.Context(), h.config.ClickHouse.TaggedTable), h.config.ClickHouse.URL, sql, - clickhouse.Options{ - TLSConfig: h.config.ClickHouse.TLSConfig, - Timeout: h.config.ClickHouse.IndexTimeout, - ConnectTimeout: h.config.ClickHouse.ConnectTimeout, - CheckRequestProgress: h.config.FeatureFlags.LogQueryProgress, - ProgressSendingInterval: h.config.ClickHouse.ProgressSendingInterval, - }, + opts, nil, ) @@ -490,6 +523,7 @@ func (h *Handler) ServeValues(w http.ResponseWriter, r *http.Request) { queueFail bool queueDuration time.Duration findCache bool + opts clickhouse.Options ) username := r.Header.Get("X-Forwarded-User") @@ -567,8 +601,23 @@ func (h *Handler) ServeValues(w http.ResponseWriter, r *http.Request) { } } + opts = clickhouse.Options{ + TLSConfig: h.config.ClickHouse.TLSConfig, + Timeout: h.config.ClickHouse.IndexTimeout, + ConnectTimeout: h.config.ClickHouse.ConnectTimeout, + CheckRequestProgress: h.config.FeatureFlags.LogQueryProgress, + ProgressSendingInterval: h.config.ClickHouse.ProgressSendingInterval, + } + if !findCache { - wr, pw, usedTags, err := h.requestExpr(r) + + wr, pw, usedTags, err := h.requestExpr( + r, + getTagCountQuerier(h.config, opts), + start.AddDate(0, 0, -h.config.ClickHouse.TaggedAutocompleDays).Unix(), + start.Unix(), + ) + if err == finder.ErrCostlySeriesByTag { status = http.StatusForbidden http.Error(w, err.Error(), status) @@ -640,13 +689,7 @@ func (h *Handler) ServeValues(w http.ResponseWriter, r *http.Request) { scope.WithTable(r.Context(), h.config.ClickHouse.TaggedTable), h.config.ClickHouse.URL, sql, - clickhouse.Options{ - TLSConfig: h.config.ClickHouse.TLSConfig, - Timeout: h.config.ClickHouse.IndexTimeout, - ConnectTimeout: h.config.ClickHouse.ConnectTimeout, - CheckRequestProgress: h.config.FeatureFlags.LogQueryProgress, - ProgressSendingInterval: h.config.ClickHouse.ProgressSendingInterval, - }, + opts, nil, ) diff --git a/deploy/doc/config.md b/deploy/doc/config.md index 0709adc8..72de4b12 100644 --- a/deploy/doc/config.md +++ b/deploy/doc/config.md @@ -225,4 +225,4 @@ Here we additionally create a materialized view to automatically save the quanti graphite-clickhouse will query this table when it tries to decide which tag should be used when querying graphite_tagged table. Overall using this parameter will somewhat increase writing load but can improve reading tagged metrics greatly in some cases. -Note that this option only works for terms with '=' operator in them. +Note that this option only works for terms with '=' operator in them. Using it will also override tag costs that were set manually with tagged-costs option. diff --git a/doc/config.md b/doc/config.md index f0416297..6ff16c8e 100644 --- a/doc/config.md +++ b/doc/config.md @@ -228,7 +228,7 @@ Here we additionally create a materialized view to automatically save the quanti graphite-clickhouse will query this table when it tries to decide which tag should be used when querying graphite_tagged table. Overall using this parameter will somewhat increase writing load but can improve reading tagged metrics greatly in some cases. -Note that this option only works for terms with '=' operator in them. +Note that this option only works for terms with '=' operator in them. Using it will also override tag costs that were set manually with tagged-costs option. ```toml [common] diff --git a/finder/tagged.go b/finder/tagged.go index b3069210..d81a146a 100644 --- a/finder/tagged.go +++ b/finder/tagged.go @@ -6,7 +6,6 @@ import ( "fmt" "net/http" "sort" - "strconv" "strings" "github.com/lomik/graphite-clickhouse/config" @@ -79,10 +78,10 @@ func (s TaggedTermList) Less(i, j int) bool { type TaggedFinder struct { url string // clickhouse dsn table string // graphite_tag table - tag1CountTable string // table that helps to choose the most optimal Tag1 + tcq *TagCountQuerier // An object for querying tag weights from clickhouse. See doc/config.md for details. absKeepEncoded bool // Abs returns url encoded value. For queries from prometheus opts clickhouse.Options // clickhouse query timeout - taggedCosts map[string]*config.Costs // costs for taggs (sor tune index search) + configuredTagCosts map[string]*config.Costs // costs for taggs (sor tune index search) dailyEnabled bool useCarbonBehavior bool dontMatchMissingTags bool @@ -92,19 +91,30 @@ type TaggedFinder struct { } func NewTagged(url string, table, tag1CountTable string, dailyEnabled, useCarbonBehavior, dontMatchMissingTags, absKeepEncoded bool, opts clickhouse.Options, taggedCosts map[string]*config.Costs) *TaggedFinder { - return &TaggedFinder{ + fnd := &TaggedFinder{ url: url, table: table, - tag1CountTable: tag1CountTable, + tcq: nil, absKeepEncoded: absKeepEncoded, opts: opts, - taggedCosts: taggedCosts, + configuredTagCosts: taggedCosts, dailyEnabled: dailyEnabled, useCarbonBehavior: useCarbonBehavior, dontMatchMissingTags: dontMatchMissingTags, metricMightExists: true, stats: make([]metrics.FinderStat, 0), } + if tag1CountTable != "" { + fnd.tcq = NewTagCountQuerier( + url, + tag1CountTable, + opts, + useCarbonBehavior, + dontMatchMissingTags, + dailyEnabled, + ) + } + return fnd } func (term *TaggedTerm) concat() string { @@ -606,17 +616,18 @@ func (t *TaggedFinder) PrepareTaggedTerms(ctx context.Context, cfg *config.Confi return nil, err } - if t.tag1CountTable != "" { - err = t.SetCostsFromCountTable(ctx, terms, from, until) + var tagCounts map[string]*config.Costs = nil + if t.tcq != nil { + tagCounts, err = t.tcq.GetCostsFromCountTable(ctx, terms, from, until) if err != nil { return nil, err } } - // Set costs from count table only if it has all tag-value pairs contained in the seriesByTag query (t.metricMightExist == true) - // or if tagged costs were set in the config file and t.metricMightExist == false - if t.metricMightExists || len(t.taggedCosts) != 0 { - SetCosts(terms, t.taggedCosts) + if tagCounts != nil { + SetCosts(terms, tagCounts) + } else if len(t.configuredTagCosts) != 0 { + SetCosts(terms, t.configuredTagCosts) } SortTaggedTermsByCost(terms) @@ -659,79 +670,6 @@ func SortTaggedTermsByCost(terms []TaggedTerm) { }) } -func (t *TaggedFinder) SetCostsFromCountTable(ctx context.Context, terms []TaggedTerm, from int64, until int64) error { - w := where.New() - eqTermCount := 0 - - for i := 0; i < len(terms); i++ { - if terms[i].Op == TaggedTermEq && !terms[i].HasWildcard && terms[i].Value != "" { - sqlTerm, err := TaggedTermWhere1(&terms[i], t.useCarbonBehavior, t.dontMatchMissingTags) - if err != nil { - return err - } - - w.Or(sqlTerm) - - eqTermCount++ - } - } - - if w.SQL() == "" { - return nil - } - - if t.dailyEnabled { - w.Andf( - "Date >= '%s' AND Date <= '%s'", - date.FromTimestampToDaysFormat(from), - date.UntilTimestampToDaysFormat(until), - ) - } else { - w.Andf( - "Date >= '%s'", - date.FromTimestampToDaysFormat(from), - ) - } - - sql := fmt.Sprintf("SELECT Tag1, sum(Count) as cnt FROM %s %s GROUP BY Tag1 FORMAT TabSeparatedRaw", t.tag1CountTable, w.SQL()) - - var err error - - t.stats = append(t.stats, metrics.FinderStat{}) - stat := &t.stats[len(t.stats)-1] - stat.Table = t.tag1CountTable - - t.body, stat.ChReadRows, stat.ChReadBytes, err = clickhouse.Query(scope.WithTable(ctx, t.tag1CountTable), t.url, sql, t.opts, nil) - if err != nil { - return err - } - - rows := t.List() - - // create cost var to validate CH response without writing to t.taggedCosts - var costs map[string]*config.Costs - - costs, err = chResultToCosts(rows) - if err != nil { - return err - } - - // The metric does not exist if the response has less rows - // than there were tags with '=' op in the initial request - // This is due to each tag-value pair of a metric being written - // exactly one time as Tag1 - if len(rows) < eqTermCount { - t.body = []byte{} - t.metricMightExists = false - - return nil - } - - t.taggedCosts = costs - - return nil -} - func SetCosts(terms []TaggedTerm, costs map[string]*config.Costs) { for i := 0; i < len(terms); i++ { if cost, ok := costs[terms[i].Key]; ok { @@ -739,46 +677,3 @@ func SetCosts(terms []TaggedTerm, costs map[string]*config.Costs) { } } } - -func chResultToCosts(body [][]byte) (map[string]*config.Costs, error) { - costs := make(map[string]*config.Costs, 0) - - for i := 0; i < len(body); i++ { - s := stringutils.UnsafeString(body[i]) - - tag, val, count, err := parseTag1CountRow(s) - if err != nil { - return nil, fmt.Errorf("failed to parse result from clickhouse while querying for tag costs: %s", err.Error()) - } - - if costs[tag] == nil { - costs[tag] = &config.Costs{Cost: nil, ValuesCost: make(map[string]int, 0)} - } - - costs[tag].ValuesCost[val] = count - } - - return costs, nil -} - -func parseTag1CountRow(s string) (string, string, int, error) { - var ( - tag1, count, tag, val string - cnt, n int - err error - ) - - if tag1, count, n = stringutils.Split2(s, "\t"); n != 2 { - return "", "", 0, fmt.Errorf("no tag count") - } - - if tag, val, n = stringutils.Split2(tag1, "="); n != 2 { - return "", "", 0, fmt.Errorf("no '=' in Tag1") - } - - if cnt, err = strconv.Atoi(count); err != nil { - return "", "", 0, fmt.Errorf("can't convert count to int") - } - - return tag, val, cnt, nil -} diff --git a/finder/tagged_test.go b/finder/tagged_test.go index 792b2f94..0284502a 100644 --- a/finder/tagged_test.go +++ b/finder/tagged_test.go @@ -625,7 +625,6 @@ func TestParseSeriesByTagWithCostsFromCountTable(t *testing.T) { testName, query, sql string, response *chtest.TestResponse, expected []TaggedTerm, - metricMightExist bool, expectedErr error, useTagCostsFromConfig bool, ) { @@ -668,7 +667,6 @@ func TestParseSeriesByTagWithCostsFromCountTable(t *testing.T) { } assert.NoError(err) - assert.Equal(metricMightExist, taggedFinder.metricMightExists, testName+", metricMightExist") length := len(expected) if length < len(terms) { @@ -701,7 +699,6 @@ func TestParseSeriesByTagWithCostsFromCountTable(t *testing.T) { {Op: TaggedTermEq, Key: "dc", Value: "west", Cost: 10, NonDefaultCost: true}, {Op: TaggedTermEq, Key: "environment", Value: "production", Cost: 100, NonDefaultCost: true}, }, - true, nil, false, ) @@ -721,7 +718,6 @@ func TestParseSeriesByTagWithCostsFromCountTable(t *testing.T) { {Op: TaggedTermEq, Key: "dc", Value: "west", Cost: 0, NonDefaultCost: false}, {Op: TaggedTermEq, Key: "key", Value: "value", Cost: 0, NonDefaultCost: false}, }, - false, nil, false, ) @@ -741,7 +737,6 @@ func TestParseSeriesByTagWithCostsFromCountTable(t *testing.T) { {Op: TaggedTermEq, Key: "dc", Value: "west", Cost: 0, NonDefaultCost: false}, {Op: TaggedTermEq, Key: "key", Value: "value", Cost: 0, NonDefaultCost: false}, }, - false, nil, false, ) @@ -761,7 +756,6 @@ func TestParseSeriesByTagWithCostsFromCountTable(t *testing.T) { {Op: TaggedTermEq, Key: "environment", Value: "production", Cost: 100, NonDefaultCost: true}, {Op: TaggedTermEq, Key: "dc", Value: "*", Cost: 0, NonDefaultCost: false, HasWildcard: true}, }, - true, nil, false, ) @@ -782,7 +776,6 @@ func TestParseSeriesByTagWithCostsFromCountTable(t *testing.T) { {Op: TaggedTermMatch, Key: "status", Value: "^o.*", Cost: 0}, {Op: TaggedTermMatch, Key: "key", Value: "val.*", Cost: 0}, }, - true, nil, false, ) @@ -803,7 +796,6 @@ func TestParseSeriesByTagWithCostsFromCountTable(t *testing.T) { {Op: TaggedTermNe, Key: "status", Value: "on", Cost: 0}, {Op: TaggedTermNe, Key: "key", Value: "value", Cost: 0}, }, - true, nil, false, ) @@ -824,7 +816,6 @@ func TestParseSeriesByTagWithCostsFromCountTable(t *testing.T) { {Op: TaggedTermNotMatch, Key: "status", Value: "^o.*", Cost: 0}, {Op: TaggedTermNotMatch, Key: "key", Value: "val.*", Cost: 0}, }, - true, nil, false, ) @@ -839,7 +830,6 @@ func TestParseSeriesByTagWithCostsFromCountTable(t *testing.T) { {Op: TaggedTermMatch, Key: "dc", Value: "west", Cost: 0}, {Op: TaggedTermMatch, Key: "key", Value: "^val", Cost: 0}, }, - true, nil, false, ) @@ -859,7 +849,6 @@ func TestParseSeriesByTagWithCostsFromCountTable(t *testing.T) { {Op: TaggedTermEq, Key: "dc", Value: "west", Cost: 10, NonDefaultCost: true}, {Op: TaggedTermEq, Key: "environment", Value: "production", Cost: 100, NonDefaultCost: true}, }, - true, nil, false, ) @@ -879,7 +868,6 @@ func TestParseSeriesByTagWithCostsFromCountTable(t *testing.T) { {Op: TaggedTermEq, Key: "environment", Value: "production", Cost: 100, NonDefaultCost: true}, {Op: TaggedTermEq, Key: "__name__", Value: "load.avg", Cost: 10000, NonDefaultCost: true}, }, - true, nil, false, ) @@ -899,7 +887,6 @@ func TestParseSeriesByTagWithCostsFromCountTable(t *testing.T) { {Op: TaggedTermEq, Key: "environment", Value: "production", Cost: 0, NonDefaultCost: false}, {Op: TaggedTermEq, Key: "dc", Value: "west", Cost: 0, NonDefaultCost: false}, }, - false, nil, false, ) @@ -915,7 +902,6 @@ func TestParseSeriesByTagWithCostsFromCountTable(t *testing.T) { Body: []byte("broken_response"), }, nil, - true, fmt.Errorf("failed to parse result from clickhouse while querying for tag costs: no tag count"), false, ) @@ -935,7 +921,6 @@ func TestParseSeriesByTagWithCostsFromCountTable(t *testing.T) { {Op: TaggedTermEq, Key: "__name__", Value: "high_cost", Cost: 70, NonDefaultCost: true}, {Op: TaggedTermEq, Key: "environment", Value: "production", Cost: 100, NonDefaultCost: true}, }, - false, nil, true, ) diff --git a/finder/tags_count_querier.go b/finder/tags_count_querier.go new file mode 100644 index 00000000..d0ed93bb --- /dev/null +++ b/finder/tags_count_querier.go @@ -0,0 +1,179 @@ +package finder + +import ( + "bytes" + "context" + "fmt" + "strconv" + + "github.com/lomik/graphite-clickhouse/config" + "github.com/lomik/graphite-clickhouse/helper/clickhouse" + "github.com/lomik/graphite-clickhouse/helper/date" + "github.com/lomik/graphite-clickhouse/metrics" + "github.com/lomik/graphite-clickhouse/pkg/scope" + "github.com/lomik/graphite-clickhouse/pkg/where" + "github.com/msaf1980/go-stringutils" +) + +type TagCountQuerier struct { + url string + table string + opts clickhouse.Options + useCarbonBehavior bool + dontMatchMissingTags bool + dailyEnabled bool + body []byte + stats []metrics.FinderStat +} + +func NewTagCountQuerier(url, table string, opts clickhouse.Options, useCarbonBehavior, dontMatchMissingTags, dailyEnabled bool) *TagCountQuerier { + return &TagCountQuerier{ + url: url, + table: table, + opts: opts, + useCarbonBehavior: useCarbonBehavior, + dontMatchMissingTags: dontMatchMissingTags, + dailyEnabled: dailyEnabled, + } +} + +func (tcq *TagCountQuerier) GetCostsFromCountTable(ctx context.Context, terms []TaggedTerm, from int64, until int64) (map[string]*config.Costs, error) { + w := where.New() + eqTermCount := 0 + + for i := 0; i < len(terms); i++ { + if terms[i].Op == TaggedTermEq && !terms[i].HasWildcard && terms[i].Value != "" { + sqlTerm, err := TaggedTermWhere1(&terms[i], tcq.useCarbonBehavior, tcq.dontMatchMissingTags) + if err != nil { + return nil, err + } + + w.Or(sqlTerm) + + eqTermCount++ + } + } + + if w.SQL() == "" { + return nil, nil + } + + if tcq.dailyEnabled { + w.Andf( + "Date >= '%s' AND Date <= '%s'", + date.FromTimestampToDaysFormat(from), + date.UntilTimestampToDaysFormat(until), + ) + } else { + w.Andf( + "Date >= '%s'", + date.FromTimestampToDaysFormat(from), + ) + } + + sql := fmt.Sprintf("SELECT Tag1, sum(Count) as cnt FROM %s %s GROUP BY Tag1 FORMAT TabSeparatedRaw", tcq.table, w.SQL()) + + var err error + + tcq.stats = append(tcq.stats, metrics.FinderStat{}) + stat := &tcq.stats[len(tcq.stats)-1] + stat.Table = tcq.table + + tcq.body, stat.ChReadRows, stat.ChReadBytes, err = clickhouse.Query(scope.WithTable(ctx, tcq.table), tcq.url, sql, tcq.opts, nil) + if err != nil { + return nil, err + } + + rows := tcq.List() + + // create cost var to validate CH response without writing to t.taggedCosts + var costs map[string]*config.Costs + + costs, err = chResultToCosts(rows) + if err != nil { + return nil, err + } + + // The metric does not exist if the response has less rows + // than there were tags with '=' op in the initial request + // This is due to each tag-value pair of a metric being written + // exactly one time as Tag1 + if len(rows) < eqTermCount { + tcq.body = []byte{} + return nil, nil + } + + return costs, nil +} + +func chResultToCosts(body [][]byte) (map[string]*config.Costs, error) { + costs := make(map[string]*config.Costs, 0) + + for i := 0; i < len(body); i++ { + s := stringutils.UnsafeString(body[i]) + + tag, val, count, err := parseTag1CountRow(s) + if err != nil { + return nil, fmt.Errorf("failed to parse result from clickhouse while querying for tag costs: %s", err.Error()) + } + + if costs[tag] == nil { + costs[tag] = &config.Costs{Cost: nil, ValuesCost: make(map[string]int, 0)} + } + + costs[tag].ValuesCost[val] = count + } + + return costs, nil +} + +func parseTag1CountRow(s string) (string, string, int, error) { + var ( + tag1, count, tag, val string + cnt, n int + err error + ) + + if tag1, count, n = stringutils.Split2(s, "\t"); n != 2 { + return "", "", 0, fmt.Errorf("no tag count") + } + + if tag, val, n = stringutils.Split2(tag1, "="); n != 2 { + return "", "", 0, fmt.Errorf("no '=' in Tag1") + } + + if cnt, err = strconv.Atoi(count); err != nil { + return "", "", 0, fmt.Errorf("can't convert count to int") + } + + return tag, val, cnt, nil +} + +func (t *TagCountQuerier) List() [][]byte { + if t.body == nil { + return [][]byte{} + } + + rows := bytes.Split(t.body, []byte{'\n'}) + + skip := 0 + + for i := 0; i < len(rows); i++ { + if len(rows[i]) == 0 { + skip++ + continue + } + + if skip > 0 { + rows[i-skip] = rows[i] + } + } + + rows = rows[:len(rows)-skip] + + return rows +} + +func (tcq *TagCountQuerier) Stats() []metrics.FinderStat { + return tcq.stats +} From 30d9c4c492d89efd84b126e2860422283b87e687 Mon Sep 17 00:00:00 2001 From: Artyom Antonov Date: Thu, 10 Jul 2025 13:40:36 +0500 Subject: [PATCH 2/5] run linter --- autocomplete/autocomplete.go | 4 +++- finder/tagged.go | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/autocomplete/autocomplete.go b/autocomplete/autocomplete.go index 36c3d472..2b7ed1f0 100644 --- a/autocomplete/autocomplete.go +++ b/autocomplete/autocomplete.go @@ -83,6 +83,7 @@ func getTagCountQuerier(config *config.Config, opts clickhouse.Options) *finder. config.ClickHouse.TaggedUseDaily, ) } + return tcq } @@ -115,8 +116,10 @@ func (h *Handler) requestExpr(r *http.Request, tcq *finder.TagCountQuerier, from if err != nil { return wr, pw, usedTags, err } + finder.SetCosts(terms, tagValuesCosts) } + finder.SortTaggedTermsByCost(terms) wr, pw, err = finder.TaggedWhere(terms, h.config.FeatureFlags.UseCarbonBehavior, h.config.FeatureFlags.DontMatchMissingTags) @@ -610,7 +613,6 @@ func (h *Handler) ServeValues(w http.ResponseWriter, r *http.Request) { } if !findCache { - wr, pw, usedTags, err := h.requestExpr( r, getTagCountQuerier(h.config, opts), diff --git a/finder/tagged.go b/finder/tagged.go index d81a146a..1753fbd0 100644 --- a/finder/tagged.go +++ b/finder/tagged.go @@ -114,6 +114,7 @@ func NewTagged(url string, table, tag1CountTable string, dailyEnabled, useCarbon dailyEnabled, ) } + return fnd } From db816816c4c4df79b841ca90985da9440bc135c0 Mon Sep 17 00:00:00 2001 From: Artyom Antonov Date: Thu, 17 Jul 2025 16:35:41 +0500 Subject: [PATCH 3/5] dont query for costs if len(terms) < 2 --- autocomplete/autocomplete.go | 6 +++++- finder/tags_count_querier.go | 4 ++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/autocomplete/autocomplete.go b/autocomplete/autocomplete.go index 2b7ed1f0..d33c4885 100644 --- a/autocomplete/autocomplete.go +++ b/autocomplete/autocomplete.go @@ -116,8 +116,12 @@ func (h *Handler) requestExpr(r *http.Request, tcq *finder.TagCountQuerier, from if err != nil { return wr, pw, usedTags, err } + if tagValuesCosts != nil { + finder.SetCosts(terms, tagValuesCosts) + } else if len(h.config.ClickHouse.TaggedCosts) != 0 { + finder.SetCosts(terms, h.config.ClickHouse.TaggedCosts) + } - finder.SetCosts(terms, tagValuesCosts) } finder.SortTaggedTermsByCost(terms) diff --git a/finder/tags_count_querier.go b/finder/tags_count_querier.go index d0ed93bb..9652f918 100644 --- a/finder/tags_count_querier.go +++ b/finder/tags_count_querier.go @@ -38,6 +38,10 @@ func NewTagCountQuerier(url, table string, opts clickhouse.Options, useCarbonBeh } func (tcq *TagCountQuerier) GetCostsFromCountTable(ctx context.Context, terms []TaggedTerm, from int64, until int64) (map[string]*config.Costs, error) { + if len(terms) < 2 { + return nil, nil + } + w := where.New() eqTermCount := 0 From 1d8a4a6f5489a70a8b368086cd4c1d990aacb4b8 Mon Sep 17 00:00:00 2001 From: Artyom Antonov Date: Thu, 17 Jul 2025 19:24:47 +0500 Subject: [PATCH 4/5] add tests --- autocomplete/autocomplete.go | 2 +- autocomplete/autocomplete_test.go | 781 ++++++++++++++++++++++++++++-- 2 files changed, 752 insertions(+), 31 deletions(-) diff --git a/autocomplete/autocomplete.go b/autocomplete/autocomplete.go index d33c4885..77a1ccc3 100644 --- a/autocomplete/autocomplete.go +++ b/autocomplete/autocomplete.go @@ -271,7 +271,7 @@ func (h *Handler) ServeTags(w http.ResponseWriter, r *http.Request) { limiter.SendDuration(queueDuration.Milliseconds()) metrics.SendFindMetrics(metrics.TagsRequestMetric, status, dMS, 0, h.config.Metrics.ExtendedStat, metricsCount) - if !findCache && chReadRows != 0 && chReadBytes != 0 { + if !findCache && chReadRows > 0 && chReadBytes > 0 { errored := status != http.StatusOK && status != http.StatusNotFound metrics.SendQueryRead(metrics.AutocompleteQMetric, 0, 0, dMS, metricsCount, readBytes, chReadRows, chReadBytes, errored) } diff --git a/autocomplete/autocomplete_test.go b/autocomplete/autocomplete_test.go index 561340ee..91da2fb4 100644 --- a/autocomplete/autocomplete_test.go +++ b/autocomplete/autocomplete_test.go @@ -9,6 +9,7 @@ import ( "time" "github.com/lomik/graphite-clickhouse/config" + "github.com/lomik/graphite-clickhouse/helper/date" chtest "github.com/lomik/graphite-clickhouse/helper/tests/clickhouse" "github.com/lomik/graphite-clickhouse/metrics" "github.com/stretchr/testify/assert" @@ -30,7 +31,7 @@ type testStruct struct { func testResponce(t *testing.T, step int, h *Handler, tt *testStruct, wantCachedFind string) { w := httptest.NewRecorder() - h.ServeValues(w, tt.request) + h.ServeHTTP(w, tt.request) s := w.Body.String() @@ -49,7 +50,7 @@ func testResponce(t *testing.T, step int, h *Handler, tt *testStruct, wantCached } } -func TestHandler_ServeValues(t *testing.T) { +func TestHandler_ServeTags(t *testing.T) { timeNow = func() time.Time { return time.Unix(1669714247, 0) } @@ -61,9 +62,165 @@ func TestHandler_ServeValues(t *testing.T) { cfg, _ := config.DefaultConfig() cfg.ClickHouse.URL = srv.URL + cfg.ClickHouse.TaggedTable = "graphite_tagged" h := NewTags(cfg) + now := timeNow() + fromDate, untilDate := dateString(h.config.ClickHouse.TaggedAutocompleDays, now) + + // Test 1: Get all tags without filters + srv.AddResponce( + "SELECT splitByChar('=', Tag1)[1] AS value FROM graphite_tagged WHERE "+ + "Date >= '"+fromDate+"' AND Date <= '"+untilDate+"' GROUP BY value ORDER BY value LIMIT 10000", + &chtest.TestResponse{ + Body: []byte("__name__\nenvironment\nproject\nhost\n"), + }) + + // Test 2: Get tags with prefix filter + srv.AddResponce( + "SELECT splitByChar('=', Tag1)[1] AS value FROM graphite_tagged WHERE "+ + "(Tag1 LIKE 'pr%') AND (Date >= '"+fromDate+"' AND Date <= '"+untilDate+"') GROUP BY value ORDER BY value LIMIT 10000", + &chtest.TestResponse{ + Body: []byte("project\n"), + }) + + // Test 3: Get tags with expr filters + srv.AddResponce( + "SELECT splitByChar('=', arrayJoin(Tags))[1] AS value FROM graphite_tagged WHERE "+ + "(Tag1='environment=production') AND (Date >= '"+fromDate+"' AND Date <= '"+untilDate+"') GROUP BY value ORDER BY value LIMIT 10001", + &chtest.TestResponse{ + Body: []byte("__name__\nhost\nproject\n"), + }) + + // Test 4: Get tags with multiple expr filters + srv.AddResponce( + "SELECT splitByChar('=', arrayJoin(Tags))[1] AS value FROM graphite_tagged WHERE "+ + "((Tag1='environment=production') AND (has(Tags, 'project=web'))) AND (Date >= '"+fromDate+"' AND Date <= '"+untilDate+"') GROUP BY value ORDER BY value LIMIT 10002", + &chtest.TestResponse{ + Body: []byte("__name__\nhost\n"), + }) + + // Test 5: Get tags with prefix and expr filters + srv.AddResponce( + "SELECT splitByChar('=', arrayJoin(Tags))[1] AS value FROM graphite_tagged WHERE "+ + "((Tag1='environment=production') AND (arrayJoin(Tags) LIKE 'h%')) AND (Date >= '"+fromDate+"' AND Date <= '"+untilDate+"') GROUP BY value ORDER BY value LIMIT 10001", + &chtest.TestResponse{ + Body: []byte("host\n"), + }) + + tests := []testStruct{ + { + request: NewRequest("GET", srv.URL+"/tags/autoComplete/tags", nil), + wantCode: http.StatusOK, + want: `["environment","host","name","project"]`, + }, + { + request: NewRequest("GET", srv.URL+"/tags/autoComplete/tags?tagPrefix=pr", nil), + wantCode: http.StatusOK, + want: `["project"]`, + }, + { + request: NewRequest("GET", srv.URL+"/tags/autoComplete/tags?expr=environment%3Dproduction", nil), + wantCode: http.StatusOK, + want: `["host","name","project"]`, + }, + { + request: NewRequest("GET", srv.URL+"/tags/autoComplete/tags?expr=environment%3Dproduction&expr=project%3Dweb", nil), + wantCode: http.StatusOK, + want: `["host","name"]`, + }, + { + request: NewRequest("GET", srv.URL+"/tags/autoComplete/tags?expr=environment%3Dproduction&tagPrefix=h", nil), + wantCode: http.StatusOK, + want: `["host"]`, + }, + } + + for i, tt := range tests { + t.Run("Test#"+strconv.Itoa(i), func(t *testing.T) { + testResponce(t, i, h, &tt, "") + }) + } +} + +func TestHandler_ServeTagsWithCache(t *testing.T) { + timeNow = func() time.Time { + return time.Unix(1669714247, 0) + } + + metrics.DisableMetrics() + + srv := chtest.NewTestServer() + defer srv.Close() + + cfg, _ := config.DefaultConfig() + cfg.ClickHouse.URL = srv.URL + cfg.ClickHouse.TaggedTable = "graphite_tagged" + + // Enable cache + cfg.Common.FindCacheConfig = config.CacheConfig{ + Type: "mem", + Size: 8192, + FindTimeoutSec: 1, + } + + var err error + cfg.Common.FindCache, err = config.CreateCache("autocomplete", &cfg.Common.FindCacheConfig) + if err != nil { + t.Fatalf("Failed to create find cache: %v", err) + } + + h := NewTags(cfg) + + now := timeNow() + fromDate, untilDate := dateString(h.config.ClickHouse.TaggedAutocompleDays, now) + + srv.AddResponce( + "SELECT splitByChar('=', Tag1)[1] AS value FROM graphite_tagged WHERE "+ + "Date >= '"+fromDate+"' AND Date <= '"+untilDate+"' GROUP BY value ORDER BY value LIMIT 10000", + &chtest.TestResponse{ + Body: []byte("__name__\nenvironment\nproject\nhost\n"), + }) + + test := testStruct{ + request: NewRequest("GET", srv.URL+"/tags/autoComplete/tags", nil), + wantCode: http.StatusOK, + want: `["environment","host","name","project"]`, + } + + // First request - should hit the database + testResponce(t, 0, h, &test, "") + assert.Equal(t, uint64(1), srv.Queries()) + + // Second request - should hit the cache + testResponce(t, 1, h, &test, "1") + assert.Equal(t, uint64(1), srv.Queries()) // No new queries + + // Wait for cache expiration + time.Sleep(time.Second * 2) + + // Third request - should hit the database again + testResponce(t, 2, h, &test, "") + assert.Equal(t, uint64(2), srv.Queries()) +} + +func TestHandler_ServeValues(t *testing.T) { + timeNow = func() time.Time { + return time.Unix(1669714247, 0) + } + + metrics.DisableMetrics() + + srv := chtest.NewTestServer() + defer srv.Close() + + cfg, _ := config.DefaultConfig() + cfg.ClickHouse.URL = srv.URL + cfg.ClickHouse.TaggedTable = "graphite_tagged" + + h := NewValues(cfg) + now := timeNow() until := strconv.FormatInt(now.Unix(), 10) from := strconv.FormatInt(now.Add(-time.Minute).Unix(), 10) @@ -101,7 +258,79 @@ func TestHandler_ServeValues(t *testing.T) { } } -func TestTagsAutocomplete_ServeValuesCached(t *testing.T) { +func TestHandler_ServeValuesWithValuePrefix(t *testing.T) { + timeNow = func() time.Time { + return time.Unix(1669714247, 0) + } + + metrics.DisableMetrics() + + srv := chtest.NewTestServer() + defer srv.Close() + + cfg, _ := config.DefaultConfig() + cfg.ClickHouse.URL = srv.URL + cfg.ClickHouse.TaggedTable = "graphite_tagged" + + h := NewValues(cfg) + + now := timeNow() + fromDate, untilDate := dateString(h.config.ClickHouse.TaggedAutocompleDays, now) + + // Test with valuePrefix + srv.AddResponce( + "SELECT substr(Tag1, 6) AS value FROM graphite_tagged WHERE "+ + "(Tag1 LIKE 'host=dc-%') AND (Date >= '"+fromDate+"' AND Date <= '"+untilDate+"') GROUP BY value ORDER BY value LIMIT 100", + &chtest.TestResponse{ + Body: []byte("dc-host1\ndc-host2\ndc-host3\n"), + }) + + test := testStruct{ + request: NewRequest("GET", srv.URL+"/tags/autoComplete/values?tag=host&valuePrefix=dc-&limit=100", nil), + wantCode: http.StatusOK, + want: "[\"dc-host1\",\"dc-host2\",\"dc-host3\"]", + } + + testResponce(t, 0, h, &test, "") +} + +func TestHandler_ServeValuesNameTag(t *testing.T) { + timeNow = func() time.Time { + return time.Unix(1669714247, 0) + } + + metrics.DisableMetrics() + + srv := chtest.NewTestServer() + defer srv.Close() + + cfg, _ := config.DefaultConfig() + cfg.ClickHouse.URL = srv.URL + cfg.ClickHouse.TaggedTable = "graphite_tagged" + + h := NewValues(cfg) + + now := timeNow() + fromDate, untilDate := dateString(h.config.ClickHouse.TaggedAutocompleDays, now) + + // Test with name tag (which should be converted to __name__) + srv.AddResponce( + `SELECT substr(Tag1, 10) AS value FROM graphite_tagged WHERE `+ + `(Tag1 LIKE '\\_\\_name\\_\\_=metric.%') AND (Date >= '`+fromDate+`' AND Date <= '`+untilDate+`') GROUP BY value ORDER BY value LIMIT 10000`, + &chtest.TestResponse{ + Body: []byte("metric.cpu.usage\nmetric.memory.used\nmetric.disk.io\n"), + }) + + test := testStruct{ + request: NewRequest("GET", srv.URL+"/tags/autoComplete/values?tag=name&valuePrefix=metric.", nil), + wantCode: http.StatusOK, + want: "[\"metric.cpu.usage\",\"metric.memory.used\",\"metric.disk.io\"]", + } + + testResponce(t, 0, h, &test, "") +} + +func TestHandler_ServeValuesWithCache(t *testing.T) { timeNow = func() time.Time { return time.Unix(1669714247, 0) } @@ -113,8 +342,9 @@ func TestTagsAutocomplete_ServeValuesCached(t *testing.T) { cfg, _ := config.DefaultConfig() cfg.ClickHouse.URL = srv.URL + cfg.ClickHouse.TaggedTable = "graphite_tagged" - // find cache config + // Enable cache cfg.Common.FindCacheConfig = config.CacheConfig{ Type: "mem", Size: 8192, @@ -122,54 +352,545 @@ func TestTagsAutocomplete_ServeValuesCached(t *testing.T) { } var err error + cfg.Common.FindCache, err = config.CreateCache("autocomplete", &cfg.Common.FindCacheConfig) + if err != nil { + t.Fatalf("Failed to create find cache: %v", err) + } + + h := NewValues(cfg) + + now := timeNow() + fromDate, untilDate := dateString(h.config.ClickHouse.TaggedAutocompleDays, now) + + srv.AddResponce( + "SELECT substr(Tag1, 6) AS value FROM graphite_tagged WHERE "+ + "(Tag1 LIKE 'host=%') AND (Date >= '"+fromDate+"' AND Date <= '"+untilDate+"') GROUP BY value ORDER BY value LIMIT 10000", + &chtest.TestResponse{ + Body: []byte("host1\nhost2\nhost3\n"), + }) + + test := testStruct{ + request: NewRequest("GET", srv.URL+"/tags/autoComplete/values?tag=host", nil), + wantCode: http.StatusOK, + want: "[\"host1\",\"host2\",\"host3\"]", + } + + // First request - should hit the database + testResponce(t, 0, h, &test, "") + assert.Equal(t, uint64(1), srv.Queries()) + + // Second request - should hit the cache + testResponce(t, 1, h, &test, "1") + assert.Equal(t, uint64(1), srv.Queries()) // No new queries + + // Wait for cache expiration + time.Sleep(time.Second * 2) + + // Third request - should hit the database again + testResponce(t, 2, h, &test, "") + assert.Equal(t, uint64(2), srv.Queries()) +} + +func TestHandler_ServeValuesWithCacheAndExpr(t *testing.T) { + timeNow = func() time.Time { + return time.Unix(1669714247, 0) + } + + metrics.DisableMetrics() + + srv := chtest.NewTestServer() + defer srv.Close() + + cfg, _ := config.DefaultConfig() + cfg.ClickHouse.URL = srv.URL + cfg.ClickHouse.TaggedTable = "graphite_tagged" + + // Enable cache + cfg.Common.FindCacheConfig = config.CacheConfig{ + Type: "mem", + Size: 8192, + FindTimeoutSec: 1, + } + + var err error + cfg.Common.FindCache, err = config.CreateCache("autocomplete", &cfg.Common.FindCacheConfig) + if err != nil { + t.Fatalf("Failed to create find cache: %v", err) + } + + h := NewValues(cfg) + + now := timeNow() + fromDate, untilDate := dateString(h.config.ClickHouse.TaggedAutocompleDays, now) + + srv.AddResponce( + "SELECT substr(arrayFilter(x -> x LIKE 'host=%', Tags)[1], 6) AS value FROM graphite_tagged WHERE "+ + "((Tag1='environment=production') AND (arrayExists(x -> x LIKE 'host=%', Tags))) AND (Date >= '"+fromDate+"' AND Date <= '"+untilDate+"') GROUP BY value ORDER BY value LIMIT 10000", + &chtest.TestResponse{ + Body: []byte("prod-host1\nprod-host2\n"), + }) + + test := testStruct{ + request: NewRequest("GET", srv.URL+"/tags/autoComplete/values?tag=host&expr=environment%3Dproduction", nil), + wantCode: http.StatusOK, + want: "[\"prod-host1\",\"prod-host2\"]", + } + + // First request - should hit the database + testResponce(t, 0, h, &test, "") + assert.Equal(t, uint64(1), srv.Queries()) + + // Second request - should hit the cache + testResponce(t, 1, h, &test, "1") + assert.Equal(t, uint64(1), srv.Queries()) // No new queries + + // Test with different valuePrefix - should not hit cache + test2 := testStruct{ + request: NewRequest("GET", srv.URL+"/tags/autoComplete/values?tag=host&expr=environment%3Dproduction&valuePrefix=prod-host1", nil), + wantCode: http.StatusOK, + want: "[\"prod-host1\"]", + } + + srv.AddResponce( + "SELECT substr(arrayFilter(x -> x LIKE 'host=prod-host1%', Tags)[1], 6) AS value FROM graphite_tagged WHERE "+ + "((Tag1='environment=production') AND (arrayExists(x -> x LIKE 'host=prod-host1%', Tags))) AND (Date >= '"+fromDate+"' AND Date <= '"+untilDate+"') GROUP BY value ORDER BY value LIMIT 10000", + &chtest.TestResponse{ + Body: []byte("prod-host1\n"), + }) + + // Should hit the database because valuePrefix is different + testResponce(t, 2, h, &test2, "") + assert.Equal(t, uint64(2), srv.Queries()) +} + +func TestHandler_ServeValuesWithInvalidLimit(t *testing.T) { + timeNow = func() time.Time { + return time.Unix(1669714247, 0) + } + + metrics.DisableMetrics() + + srv := chtest.NewTestServer() + defer srv.Close() + + cfg, _ := config.DefaultConfig() + cfg.ClickHouse.URL = srv.URL + cfg.ClickHouse.TaggedTable = "graphite_tagged" + + h := NewValues(cfg) + + test := testStruct{ + request: NewRequest("GET", srv.URL+"/tags/autoComplete/values?tag=host&limit=invalid", nil), + wantCode: http.StatusBadRequest, + want: "", // Error response + } + + testResponce(t, 0, h, &test, "") +} + +func TestHandler_ServeValuesWithMultipleExpr(t *testing.T) { + timeNow = func() time.Time { + return time.Unix(1669714247, 0) + } + + metrics.DisableMetrics() + + srv := chtest.NewTestServer() + defer srv.Close() + + cfg, _ := config.DefaultConfig() + cfg.ClickHouse.URL = srv.URL + cfg.ClickHouse.TaggedTable = "graphite_tagged" + + h := NewValues(cfg) + + now := timeNow() + fromDate, untilDate := dateString(h.config.ClickHouse.TaggedAutocompleDays, now) + + // Test with multiple expressions and valuePrefix + srv.AddResponce( + "SELECT substr(arrayFilter(x -> x LIKE 'host=dc-%', Tags)[1], 6) AS value FROM graphite_tagged WHERE "+ + "(((Tag1='environment=production') AND (has(Tags, 'project=web'))) AND (arrayExists(x -> x LIKE 'host=dc-%', Tags))) AND "+ + "(Date >= '"+fromDate+"' AND Date <= '"+untilDate+"') GROUP BY value ORDER BY value LIMIT 10000", + &chtest.TestResponse{ + Body: []byte("dc-host1\ndc-host2\n"), + }) + + test := testStruct{ + request: NewRequest("GET", srv.URL+"/tags/autoComplete/values?tag=host&expr=environment%3Dproduction&expr=project%3Dweb&valuePrefix=dc-", nil), + wantCode: http.StatusOK, + want: "[\"dc-host1\",\"dc-host2\"]", + } + + testResponce(t, 0, h, &test, "") +} +func TestHandler_ServeValuesNoCache(t *testing.T) { + timeNow = func() time.Time { + return time.Unix(1669714247, 0) + } + + metrics.DisableMetrics() + + srv := chtest.NewTestServer() + defer srv.Close() + + cfg, _ := config.DefaultConfig() + cfg.ClickHouse.URL = srv.URL + cfg.ClickHouse.TaggedTable = "graphite_tagged" + + // Enable cache + cfg.Common.FindCacheConfig = config.CacheConfig{ + Type: "mem", + Size: 8192, + FindTimeoutSec: 60, + } + + var err error cfg.Common.FindCache, err = config.CreateCache("autocomplete", &cfg.Common.FindCacheConfig) if err != nil { t.Fatalf("Failed to create find cache: %v", err) } + h := NewValues(cfg) + + now := timeNow() + fromDate, untilDate := dateString(h.config.ClickHouse.TaggedAutocompleDays, now) + + srv.AddResponce( + "SELECT substr(Tag1, 6) AS value FROM graphite_tagged WHERE "+ + "(Tag1 LIKE 'host=%') AND (Date >= '"+fromDate+"' AND Date <= '"+untilDate+"') GROUP BY value ORDER BY value LIMIT 10000", + &chtest.TestResponse{ + Body: []byte("host1\nhost2\n"), + }) + + test := testStruct{ + request: NewRequest("GET", srv.URL+"/tags/autoComplete/values?tag=host&noCache=true", nil), + wantCode: http.StatusOK, + want: "[\"host1\",\"host2\"]", + } + + // First request with noCache=true - should always hit the database + testResponce(t, 0, h, &test, "") + assert.Equal(t, uint64(1), srv.Queries()) + + // Second request with noCache=true - should hit the database again + testResponce(t, 1, h, &test, "") + assert.Equal(t, uint64(2), srv.Queries()) // Should increase +} + +func TestHandler_ServeValuesEmptyResult(t *testing.T) { + timeNow = func() time.Time { + return time.Unix(1669714247, 0) + } + + metrics.DisableMetrics() + + srv := chtest.NewTestServer() + defer srv.Close() + + cfg, _ := config.DefaultConfig() + cfg.ClickHouse.URL = srv.URL + cfg.ClickHouse.TaggedTable = "graphite_tagged" + + h := NewValues(cfg) + + now := timeNow() + fromDate, untilDate := dateString(h.config.ClickHouse.TaggedAutocompleDays, now) + + // Test empty result + srv.AddResponce( + "SELECT substr(Tag1, 13) AS value FROM graphite_tagged WHERE "+ + "(Tag1 LIKE 'nonexistent=%') AND (Date >= '"+fromDate+"' AND Date <= '"+untilDate+"') GROUP BY value ORDER BY value LIMIT 10000", + &chtest.TestResponse{ + Body: []byte(""), // Empty response + }) + + test := testStruct{ + request: NewRequest("GET", srv.URL+"/tags/autoComplete/values?tag=nonexistent", nil), + wantCode: http.StatusOK, + want: "null", // Empty array + } + + testResponce(t, 0, h, &test, "") +} + +func TestHandler_ServeTagsWithCostOptimization(t *testing.T) { + timeNow = func() time.Time { + return time.Unix(1669714247, 0) + } + + metrics.DisableMetrics() + + srv := chtest.NewTestServer() + defer srv.Close() + + cfg, _ := config.DefaultConfig() + cfg.ClickHouse.URL = srv.URL + cfg.ClickHouse.TaggedTable = "graphite_tagged" + cfg.ClickHouse.TagsCountTable = "tag1_count_per_day" + h := NewTags(cfg) now := timeNow() - until := strconv.FormatInt(now.Unix(), 10) - from := strconv.FormatInt(now.Add(-time.Minute).Unix(), 10) fromDate, untilDate := dateString(h.config.ClickHouse.TaggedAutocompleDays, now) + from := now.AddDate(0, 0, -h.config.ClickHouse.TaggedAutocompleDays).Unix() + until := now.Unix() + // Test case: Tags query with multiple expressions should use cost optimization + // First response: tags count query to get costs srv.AddResponce( - "SELECT substr(arrayFilter(x -> x LIKE 'host=%', Tags)[1], 6) AS value FROM graphite_tagged WHERE (((Tag1='environment=production') AND (has(Tags, 'project=web'))) AND (arrayExists(x -> x LIKE 'host=%', Tags))) AND "+ + "SELECT Tag1, sum(Count) as cnt FROM tag1_count_per_day WHERE "+ + "((Tag1='environment=production') OR (Tag1='project=web')) AND "+ + "(Date >= '"+date.FromTimestampToDaysFormat(from)+"' AND Date <= '"+date.UntilTimestampToDaysFormat(until)+"') GROUP BY Tag1 FORMAT TabSeparatedRaw", + &chtest.TestResponse{ + Body: []byte("environment=production\t10000\nproject=web\t500\n"), + }) + + // Second response: main tags query (should be ordered based on costs) + srv.AddResponce( + "SELECT splitByChar('=', arrayJoin(Tags))[1] AS value FROM graphite_tagged WHERE "+ + "((Tag1='project=web') AND (has(Tags, 'environment=production'))) AND "+ // Lower cost term first + "(Date >= '"+fromDate+"' AND Date <= '"+untilDate+"') GROUP BY value ORDER BY value LIMIT 10002", + &chtest.TestResponse{ + Body: []byte("__name__\nhost\nregion\n"), + }) + + test := testStruct{ + request: NewRequest("GET", srv.URL+"/tags/autoComplete/tags?expr=environment%3Dproduction&expr=project%3Dweb", nil), + wantCode: http.StatusOK, + want: `["host","name","region"]`, + } + + testResponce(t, 0, h, &test, "") + assert.Equal(t, uint64(2), srv.Queries()) // Should have 2 queries: cost query + main query +} + +func TestHandler_ServeValuesWithCostOptimization(t *testing.T) { + timeNow = func() time.Time { + return time.Unix(1669714247, 0) + } + + metrics.DisableMetrics() + + srv := chtest.NewTestServer() + defer srv.Close() + + cfg, _ := config.DefaultConfig() + cfg.ClickHouse.URL = srv.URL + cfg.ClickHouse.TaggedTable = "graphite_tagged" + cfg.ClickHouse.TagsCountTable = "tag1_count_per_day" + + h := NewValues(cfg) + + now := timeNow() + fromDate, untilDate := dateString(h.config.ClickHouse.TaggedAutocompleDays, now) + from := now.AddDate(0, 0, -h.config.ClickHouse.TaggedAutocompleDays).Unix() + until := now.Unix() + + // First response: tags count query for cost optimization + srv.AddResponce( + "SELECT Tag1, sum(Count) as cnt FROM tag1_count_per_day WHERE "+ + "((Tag1='environment=production') OR (Tag1='datacenter=us-east')) AND "+ + "(Date >= '"+date.FromTimestampToDaysFormat(from)+"' AND Date <= '"+date.UntilTimestampToDaysFormat(until)+"') GROUP BY Tag1 FORMAT TabSeparatedRaw", + &chtest.TestResponse{ + Body: []byte("environment=production\t5000\ndatacenter=us-east\t100\n"), + }) + + // Second response: values query (should use optimized order) + srv.AddResponce( + "SELECT substr(arrayFilter(x -> x LIKE 'host=%', Tags)[1], 6) AS value FROM graphite_tagged WHERE "+ + "(((Tag1='datacenter=us-east') AND (has(Tags, 'environment=production'))) AND "+ // Lower cost first + "(arrayExists(x -> x LIKE 'host=%', Tags))) AND "+ "(Date >= '"+fromDate+"' AND Date <= '"+untilDate+"') GROUP BY value ORDER BY value LIMIT 10000", &chtest.TestResponse{ - Body: []byte("host1\nhost2\ndc-host2\ndc-host3\n"), + Body: []byte("host1\nhost2\nhost3\n"), }) - tests := []testStruct{ - { - request: NewRequest("GET", srv.URL+"/tags/autoComplete/values?"+ - "expr=environment%3Dproduction"+"&"+"expr=project%3Dweb"+"&"+"tag=host"+ - "&limit=10000&from="+from+"&until="+until, nil), - wantCode: http.StatusOK, - want: "[\"host1\",\"host2\",\"dc-host2\",\"dc-host3\"]", - wantContent: "text/plain; charset=utf-8", - }, + test := testStruct{ + request: NewRequest("GET", srv.URL+"/tags/autoComplete/values?tag=host&expr=environment%3Dproduction&expr=datacenter%3Dus-east", nil), + wantCode: http.StatusOK, + want: "[\"host1\",\"host2\",\"host3\"]", } - var queries uint64 + testResponce(t, 0, h, &test, "") + assert.Equal(t, uint64(2), srv.Queries()) +} - for i, tt := range tests { - t.Run(tt.request.URL.RawQuery+"#"+strconv.Itoa(i), func(t *testing.T) { - testResponce(t, 0, h, &tt, "") - assert.Equal(t, uint64(1), srv.Queries()-queries) +func TestHandler_ServeTagsWithWildcardExpressions(t *testing.T) { + timeNow = func() time.Time { + return time.Unix(1669714247, 0) + } - // query from cache - testResponce(t, 1, h, &tt, "1") - assert.Equal(t, uint64(1), srv.Queries()-queries) + metrics.DisableMetrics() - // wait for expire cache - time.Sleep(time.Second * 3) - testResponce(t, 2, h, &tt, "") + srv := chtest.NewTestServer() + defer srv.Close() - assert.Equal(t, uint64(2), srv.Queries()-queries) - queries = srv.Queries() + cfg, _ := config.DefaultConfig() + cfg.ClickHouse.URL = srv.URL + cfg.ClickHouse.TaggedTable = "graphite_tagged" + cfg.ClickHouse.TagsCountTable = "tag1_count_per_day" + + h := NewTags(cfg) + + now := timeNow() + fromDate, untilDate := dateString(h.config.ClickHouse.TaggedAutocompleDays, now) + + // Test with wildcard expressions (should not query tags count table) + srv.AddResponce( + "SELECT splitByChar('=', arrayJoin(Tags))[1] AS value FROM graphite_tagged WHERE "+ + "(Tag1 LIKE 'environment=prod%') AND (Date >= '"+fromDate+"' AND Date <= '"+untilDate+"') GROUP BY value ORDER BY value LIMIT 10001", + &chtest.TestResponse{ + Body: []byte("__name__\nhost\nproject\n"), }) + + test := testStruct{ + request: NewRequest("GET", srv.URL+"/tags/autoComplete/tags?expr=environment%3Dprod*", nil), + wantCode: http.StatusOK, + want: `["host","name","project"]`, } + + testResponce(t, 0, h, &test, "") + assert.Equal(t, uint64(1), srv.Queries()) // Only 1 query since wildcards skip cost optimization +} + +func TestHandler_ServeValuesWithNoEqualityTerms(t *testing.T) { + timeNow = func() time.Time { + return time.Unix(1669714247, 0) + } + + metrics.DisableMetrics() + + srv := chtest.NewTestServer() + defer srv.Close() + + cfg, _ := config.DefaultConfig() + cfg.ClickHouse.URL = srv.URL + cfg.ClickHouse.TaggedTable = "graphite_tagged" + cfg.ClickHouse.TagsCountTable = "tag1_count_per_day" + + h := NewValues(cfg) + + now := timeNow() + fromDate, untilDate := dateString(h.config.ClickHouse.TaggedAutocompleDays, now) + + // Test with != operator (should not use tags count table) + srv.AddResponce( + "SELECT substr(arrayFilter(x -> x LIKE 'host=%', Tags)[1], 6) AS value FROM graphite_tagged WHERE "+ + "((NOT arrayExists((x) -> x='environment=development', Tags)) AND (arrayExists(x -> x LIKE 'host=%', Tags))) AND "+ + "(Date >= '"+fromDate+"' AND Date <= '"+untilDate+"') GROUP BY value ORDER BY value LIMIT 10000", + &chtest.TestResponse{ + Body: []byte("host1\nhost2\n"), + }) + + test := testStruct{ + request: NewRequest("GET", srv.URL+"/tags/autoComplete/values?tag=host&expr=environment%21%3Ddevelopment", nil), + wantCode: http.StatusOK, + want: "[\"host1\",\"host2\"]", + } + + testResponce(t, 0, h, &test, "") + assert.Equal(t, uint64(1), srv.Queries()) // Only 1 query since no equality terms +} + +func TestHandler_ServeTagsWithHighCostTags(t *testing.T) { + timeNow = func() time.Time { + return time.Unix(1669714247, 0) + } + + metrics.DisableMetrics() + + srv := chtest.NewTestServer() + defer srv.Close() + + cfg, _ := config.DefaultConfig() + cfg.ClickHouse.URL = srv.URL + cfg.ClickHouse.TaggedTable = "graphite_tagged" + cfg.ClickHouse.TagsCountTable = "tag1_count_per_day" + + h := NewTags(cfg) + + now := timeNow() + fromDate, untilDate := dateString(h.config.ClickHouse.TaggedAutocompleDays, now) + from := now.AddDate(0, 0, -h.config.ClickHouse.TaggedAutocompleDays).Unix() + until := now.Unix() + + // Test with high cardinality tags - tags should be reordered based on cost + srv.AddResponce( + "SELECT Tag1, sum(Count) as cnt FROM tag1_count_per_day WHERE "+ + "(((Tag1='__name__=high.cost.metric') OR (Tag1='environment=production')) OR (Tag1='dc=west')) AND "+ + "(Date >= '"+date.FromTimestampToDaysFormat(from)+"' AND Date <= '"+date.UntilTimestampToDaysFormat(until)+"') GROUP BY Tag1 FORMAT TabSeparatedRaw", + &chtest.TestResponse{ + Body: []byte("__name__=high.cost.metric\t1000000\nenvironment=production\t10000\ndc=west\t50\n"), + }) + + // Query should use lowest cost tag first (dc=west) + srv.AddResponce( + "SELECT splitByChar('=', arrayJoin(Tags))[1] AS value FROM graphite_tagged WHERE "+ + "(((Tag1='dc=west') AND (has(Tags, 'environment=production'))) AND (has(Tags, '__name__=high.cost.metric'))) AND "+ + "(Date >= '"+fromDate+"' AND Date <= '"+untilDate+"') GROUP BY value ORDER BY value LIMIT 10003", + &chtest.TestResponse{ + Body: []byte("host\nproject\nregion\n"), + }) + + test := testStruct{ + request: NewRequest("GET", srv.URL+"/tags/autoComplete/tags?expr=name%3Dhigh.cost.metric&expr=environment%3Dproduction&expr=dc%3Dwest", nil), + wantCode: http.StatusOK, + want: `["host","project","region"]`, + } + + testResponce(t, 0, h, &test, "") + assert.Equal(t, uint64(2), srv.Queries()) +} + +func TestHandler_ServeValuesWithMixedOperators(t *testing.T) { + timeNow = func() time.Time { + return time.Unix(1669714247, 0) + } + + metrics.DisableMetrics() + + srv := chtest.NewTestServer() + defer srv.Close() + + cfg, _ := config.DefaultConfig() + cfg.ClickHouse.URL = srv.URL + cfg.ClickHouse.TaggedTable = "graphite_tagged" + cfg.ClickHouse.TagsCountTable = "tag1_count_per_day" + + h := NewValues(cfg) + + now := timeNow() + fromDate, untilDate := dateString(h.config.ClickHouse.TaggedAutocompleDays, now) + from := now.AddDate(0, 0, -h.config.ClickHouse.TaggedAutocompleDays).Unix() + until := now.Unix() + + // Test with mixed operators (only equality operators should be in cost query) + srv.AddResponce( + "SELECT Tag1, sum(Count) as cnt FROM tag1_count_per_day WHERE "+ + "((Tag1='environment=production') OR (Tag1='project=api')) AND "+ + "(Date >= '"+date.FromTimestampToDaysFormat(from)+"' AND Date <= '"+date.UntilTimestampToDaysFormat(until)+"') GROUP BY Tag1 FORMAT TabSeparatedRaw", + &chtest.TestResponse{ + Body: []byte("environment=production\t8000\nproject=api\t200\n"), + }) + + // Main query should include != operator but order by cost + srv.AddResponce( + "SELECT substr(arrayFilter(x -> x LIKE 'host=%', Tags)[1], 6) AS value FROM graphite_tagged WHERE "+ + "((((Tag1='project=api') AND (has(Tags, 'environment=production'))) AND (NOT arrayExists((x) -> x='dc=east', Tags))) AND "+ + "(arrayExists(x -> x LIKE 'host=%', Tags))) AND "+ + "(Date >= '"+fromDate+"' AND Date <= '"+untilDate+"') GROUP BY value ORDER BY value LIMIT 10000", + &chtest.TestResponse{ + Body: []byte("host1\nhost2\n"), + }) + + test := testStruct{ + request: NewRequest("GET", srv.URL+"/tags/autoComplete/values?tag=host&expr=environment%3Dproduction&expr=project%3Dapi&expr=dc%21%3Deast", nil), + wantCode: http.StatusOK, + want: "[\"host1\",\"host2\"]", + } + + testResponce(t, 0, h, &test, "") + assert.Equal(t, uint64(2), srv.Queries()) // Cost query only for equality terms } From b7086616cf739b0e1c8959722b416229e4544b70 Mon Sep 17 00:00:00 2001 From: Artyom Antonov Date: Thu, 17 Jul 2025 20:19:49 +0500 Subject: [PATCH 5/5] run linter --- autocomplete/autocomplete.go | 2 +- autocomplete/autocomplete_test.go | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/autocomplete/autocomplete.go b/autocomplete/autocomplete.go index 77a1ccc3..57620c0f 100644 --- a/autocomplete/autocomplete.go +++ b/autocomplete/autocomplete.go @@ -116,12 +116,12 @@ func (h *Handler) requestExpr(r *http.Request, tcq *finder.TagCountQuerier, from if err != nil { return wr, pw, usedTags, err } + if tagValuesCosts != nil { finder.SetCosts(terms, tagValuesCosts) } else if len(h.config.ClickHouse.TaggedCosts) != 0 { finder.SetCosts(terms, h.config.ClickHouse.TaggedCosts) } - } finder.SortTaggedTermsByCost(terms) diff --git a/autocomplete/autocomplete_test.go b/autocomplete/autocomplete_test.go index 91da2fb4..a5979ff4 100644 --- a/autocomplete/autocomplete_test.go +++ b/autocomplete/autocomplete_test.go @@ -166,6 +166,7 @@ func TestHandler_ServeTagsWithCache(t *testing.T) { } var err error + cfg.Common.FindCache, err = config.CreateCache("autocomplete", &cfg.Common.FindCacheConfig) if err != nil { t.Fatalf("Failed to create find cache: %v", err) @@ -352,6 +353,7 @@ func TestHandler_ServeValuesWithCache(t *testing.T) { } var err error + cfg.Common.FindCache, err = config.CreateCache("autocomplete", &cfg.Common.FindCacheConfig) if err != nil { t.Fatalf("Failed to create find cache: %v", err) @@ -413,6 +415,7 @@ func TestHandler_ServeValuesWithCacheAndExpr(t *testing.T) { } var err error + cfg.Common.FindCache, err = config.CreateCache("autocomplete", &cfg.Common.FindCacheConfig) if err != nil { t.Fatalf("Failed to create find cache: %v", err) @@ -547,6 +550,7 @@ func TestHandler_ServeValuesNoCache(t *testing.T) { } var err error + cfg.Common.FindCache, err = config.CreateCache("autocomplete", &cfg.Common.FindCacheConfig) if err != nil { t.Fatalf("Failed to create find cache: %v", err)