Skip to content

Commit 0ecb4fb

Browse files
authored
chore: improve metrics.go stats logging to include bloom filter time as well as fix missing size info in query-frontend (#20093)
1 parent 2d54ddc commit 0ecb4fb

File tree

10 files changed

+243
-126
lines changed

10 files changed

+243
-126
lines changed

pkg/indexgateway/gateway.go

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -226,7 +226,9 @@ func (g *Gateway) GetChunkRef(ctx context.Context, req *logproto.GetChunkRefRequ
226226
}
227227

228228
predicate := chunk.NewPredicate(matchers, &req.Plan)
229+
chunkRefsLookupStart := time.Now()
229230
chunks, _, err := g.indexQuerier.GetChunks(ctx, instanceID, req.From, req.Through, predicate, nil)
231+
chunkRefsLookupDuration := time.Since(chunkRefsLookupStart)
230232
if err != nil {
231233
return nil, err
232234
}
@@ -243,6 +245,7 @@ func (g *Gateway) GetChunkRef(ctx context.Context, req *logproto.GetChunkRefRequ
243245
initialChunkCount := len(result.Refs)
244246
result.Stats.TotalChunks = int64(initialChunkCount)
245247
result.Stats.PostFilterChunks = int64(initialChunkCount) // populate early for error reponses
248+
result.Stats.ChunkRefsLookupTime = chunkRefsLookupDuration.Seconds()
246249

247250
defer func() {
248251
if err == nil {
@@ -278,17 +281,19 @@ func (g *Gateway) GetChunkRef(ctx context.Context, req *logproto.GetChunkRefRequ
278281

279282
start = time.Now()
280283
chunkRefs, used, err := g.bloomQuerier.FilterChunkRefs(ctx, instanceID, req.From, req.Through, seriesMap, result.Refs, req.Plan)
284+
bloomFilterDuration := time.Since(start)
281285
if err != nil {
282286
return nil, err
283287
}
284288
sp.AddEvent("bloomQuerier.FilterChunkRefs", trace.WithAttributes(
285-
attribute.String("duration", time.Since(start).String()),
289+
attribute.String("duration", bloomFilterDuration.String()),
286290
))
287291

288292
result.Refs = chunkRefs
289293
level.Info(logger).Log("msg", "return filtered chunk refs", "unfiltered", initialChunkCount, "filtered", len(result.Refs), "used_blooms", used)
290294
result.Stats.PostFilterChunks = int64(len(result.Refs))
291295
result.Stats.UsedBloomFilters = used
296+
result.Stats.BloomFilterTime = bloomFilterDuration.Seconds()
292297
return result, nil
293298
}
294299

pkg/logql/metrics.go

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -115,13 +115,12 @@ func RecordRangeAndInstantQueryMetrics(
115115
result promql_parser.Value,
116116
) {
117117
var (
118-
logger = fixLogger(ctx, log)
119-
rangeType = GetRangeType(p)
120-
rt = string(rangeType)
121-
latencyType = latencyTypeFast
122-
returnedLines = 0
123-
cardinalityEstimate = uint64(0)
124-
queryTags, _ = ctx.Value(httpreq.QueryTagsHTTPHeader).(string) // it's ok to be empty.
118+
logger = fixLogger(ctx, log)
119+
rangeType = GetRangeType(p)
120+
rt = string(rangeType)
121+
latencyType = latencyTypeFast
122+
returnedLines = 0
123+
queryTags, _ = ctx.Value(httpreq.QueryTagsHTTPHeader).(string) // it's ok to be empty.
125124
)
126125

127126
queryType, err := QueryType(p.GetExpression())
@@ -162,10 +161,6 @@ func RecordRangeAndInstantQueryMetrics(
162161
bloomRatio = float64(stats.Index.TotalChunks-stats.Index.PostFilterChunks) / float64(stats.Index.TotalChunks)
163162
}
164163

165-
if r, ok := result.(CountMinSketchVector); ok {
166-
cardinalityEstimate = r.F.HyperLogLog.Estimate()
167-
}
168-
169164
logValues = append(logValues, []interface{}{
170165
"latency", latencyType, // this can be used to filter log lines.
171166
"query", query,
@@ -213,8 +208,6 @@ func RecordRangeAndInstantQueryMetrics(
213208
"cache_result_hit", resultCache.EntriesFound,
214209
"cache_result_download_time", resultCache.CacheDownloadTime(),
215210
"cache_result_query_length_served", resultCache.CacheQueryLengthServed(),
216-
// Cardinality estimate for some approximate query types
217-
"cardinality_estimate", cardinalityEstimate,
218211
// The total of chunk reference fetched from index.
219212
"ingester_chunk_refs", stats.Ingester.Store.GetTotalChunksRef(),
220213
// Total number of chunks fetched.
@@ -240,8 +233,15 @@ func RecordRangeAndInstantQueryMetrics(
240233
"index_bloom_filter_ratio", fmt.Sprintf("%.2f", bloomRatio),
241234
"index_used_bloom_filters", stats.Index.UsedBloomFilters,
242235
"index_shard_resolver_duration", time.Duration(stats.Index.ShardsDuration),
236+
"index_bloom_filter_time", logql_stats.ConvertSecondsToNanoseconds(stats.Index.BloomFilterTime),
237+
"index_chunk_refs_lookup_time", logql_stats.ConvertSecondsToNanoseconds(stats.Index.ChunkRefsLookupTime),
243238
}...)
244239

240+
if r, ok := result.(CountMinSketchVector); ok {
241+
cardinalityEstimate := r.F.HyperLogLog.Estimate()
242+
logValues = append(logValues, "cardinality_estimate", cardinalityEstimate)
243+
}
244+
245245
logValues = append(logValues, httpreq.TagsToKeyValues(queryTags)...)
246246

247247
if httpreq.ExtractHeader(ctx, httpreq.LokiDisablePipelineWrappersHeader) == "true" {

pkg/logqlmodel/stats/context.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -295,6 +295,8 @@ func (i *Index) Merge(m Index) {
295295
i.PostFilterChunks += m.PostFilterChunks
296296
i.ShardsDuration += m.ShardsDuration
297297
i.TotalStreams += m.TotalStreams
298+
i.ChunkRefsLookupTime += m.ChunkRefsLookupTime
299+
i.BloomFilterTime += m.BloomFilterTime
298300
if m.UsedBloomFilters {
299301
i.UsedBloomFilters = m.UsedBloomFilters
300302
}

pkg/logqlmodel/stats/stats.pb.go

Lines changed: 185 additions & 111 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pkg/logqlmodel/stats/stats.proto

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,14 @@ message Index {
114114
bool usedBloomFilters = 4 [(gogoproto.jsontag) = "usedBloomFilters"];
115115
// Total unique streams matched
116116
int64 totalStreams = 5 [(gogoproto.jsontag) = "totalStreams"];
117+
// Time spent fetching chunk refs from index in seconds.
118+
// In addition to internal calculations this is also returned by the HTTP API.
119+
// Grafana expects time values to be returned in seconds as float.
120+
double chunkRefsLookupTime = 6 [(gogoproto.jsontag) = "chunkRefsLookupTime"];
121+
// Time spent filtering chunks with bloom gateway in seconds.
122+
// In addition to internal calculations this is also returned by the HTTP API.
123+
// Grafana expects time values to be returned in seconds as float.
124+
double bloomFilterTime = 7 [(gogoproto.jsontag) = "bloomFilterTime"];
117125
}
118126

119127
message Querier {

pkg/querier/queryrange/codec_test.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1994,6 +1994,8 @@ var (
19941994
}
19951995
},
19961996
"index": {
1997+
"bloomFilterTime": 0,
1998+
"chunkRefsLookupTime": 0,
19971999
"postFilterChunks": 0,
19982000
"totalChunks": 0,
19992001
"totalStreams": 0,

pkg/querier/queryrange/prometheus_test.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@ import (
1414

1515
var emptyStats = `"stats": {
1616
"index": {
17+
"bloomFilterTime": 0,
18+
"chunkRefsLookupTime": 0,
1719
"postFilterChunks": 0,
1820
"totalChunks": 0,
1921
"totalStreams": 0,

pkg/querier/queryrange/stats.go

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,10 @@ import (
1212
"github.com/go-kit/log"
1313
"github.com/go-kit/log/level"
1414
"github.com/grafana/dskit/middleware"
15+
"github.com/prometheus/prometheus/promql"
1516
promql_parser "github.com/prometheus/prometheus/promql/parser"
1617

18+
"github.com/grafana/loki/v3/pkg/loghttp"
1719
"github.com/grafana/loki/v3/pkg/logproto"
1820

1921
"github.com/grafana/loki/v3/pkg/logql"
@@ -146,12 +148,30 @@ func StatsCollectorMiddleware() queryrangebase.Middleware {
146148
switch r := resp.(type) {
147149
case *LokiResponse:
148150
responseStats = &r.Statistics
149-
totalEntries = int(logqlmodel.Streams(r.Data.Result).Lines())
151+
res = logqlmodel.Streams(r.Data.Result)
152+
totalEntries = int(res.(logqlmodel.Streams).Lines())
150153
queryType = queryTypeLog
151154
case *LokiPromResponse:
152155
responseStats = &r.Statistics
156+
153157
if r.Response != nil {
154158
totalEntries = len(r.Response.Data.Result)
159+
// Convert the response to promql_parser.Value for stats calculation
160+
switch r.Response.Data.ResultType {
161+
case loghttp.ResultTypeVector:
162+
res = sampleStreamToVector(r.Response.Data.Result)
163+
case loghttp.ResultTypeMatrix:
164+
res = sampleStreamToMatrix(r.Response.Data.Result)
165+
case loghttp.ResultTypeScalar:
166+
// Scalar is represented as a single SampleStream with one sample
167+
if len(r.Response.Data.Result) > 0 && len(r.Response.Data.Result[0].Samples) > 0 {
168+
sample := r.Response.Data.Result[0].Samples[0]
169+
res = promql.Scalar{
170+
T: sample.TimestampMs,
171+
V: sample.Value,
172+
}
173+
}
174+
}
155175
}
156176

157177
queryType = queryTypeMetric

pkg/util/marshal/legacy/marshal_test.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,8 @@ var queryTests = []struct {
5757
],
5858
"stats" : {
5959
"index": {
60+
"bloomFilterTime": 0,
61+
"chunkRefsLookupTime": 0,
6062
"postFilterChunks": 0,
6163
"totalChunks": 0,
6264
"totalStreams": 0,

pkg/util/marshal/marshal_test.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@ import (
2828

2929
const emptyStats = `{
3030
"index": {
31+
"bloomFilterTime": 0,
32+
"chunkRefsLookupTime": 0,
3133
"postFilterChunks": 0,
3234
"totalChunks": 0,
3335
"totalStreams": 0,

0 commit comments

Comments
 (0)