Skip to content

Commit 24e9230

Browse files
committed
feat: Support configuring rule.Reason as a Sprintf format string
So we can define one templated Reason to generate many Reasons, e.g. generating Reasons for Nvidia GPU Xid errors in dmesg log. Close #1067
1 parent 9e366f5 commit 24e9230

File tree

2 files changed

+172
-4
lines changed

2 files changed

+172
-4
lines changed

pkg/systemlogmonitor/log_monitor.go

Lines changed: 28 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@ import (
2020
"encoding/json"
2121
"fmt"
2222
"os"
23+
"regexp"
24+
"strings"
2325
"time"
2426

2527
"k8s.io/klog/v2"
@@ -157,6 +159,9 @@ func (l *logMonitor) parseLog(log *systemlogtypes.Log) {
157159
continue
158160
}
159161
status := l.generateStatus(matched, rule)
162+
if status == nil {
163+
continue
164+
}
160165
klog.Infof("New status generated: %+v", status)
161166
l.output <- status
162167
}
@@ -169,12 +174,31 @@ func (l *logMonitor) generateStatus(logs []*systemlogtypes.Log, rule systemlogty
169174
message := generateMessage(logs, rule.PatternGeneratedMessageSuffix)
170175
var events []types.Event
171176
var changedConditions []*types.Condition
177+
178+
// Support configuring rule.Reason as a Sprintf format string and formatting it with the matched capturing groups in rule.Pattern.
179+
re := regexp.MustCompile(rule.Pattern)
180+
matches := re.FindStringSubmatch(message)
181+
reason := rule.Reason
182+
formatArgs := make([]interface{}, 0)
183+
if len(matches) > 1 {
184+
// Use the matched capturing groups as the arguments for Sprintf.
185+
for _, value := range matches[1:] {
186+
formatArgs = append(formatArgs, value)
187+
}
188+
}
189+
reason = fmt.Sprintf(rule.Reason, formatArgs...)
190+
// If fmt.Sprintf fails, it will add "%!" for each failed template in the result string.
191+
if strings.Contains(reason, "%!") {
192+
klog.Errorf("Got wrong string %q for reason %q with pattern %q", reason, rule.Reason, rule.Pattern)
193+
return nil
194+
}
195+
172196
if rule.Type == types.Temp {
173197
// For temporary error only generate event
174198
events = append(events, types.Event{
175199
Severity: types.Warn,
176200
Timestamp: timestamp,
177-
Reason: rule.Reason,
201+
Reason: reason,
178202
Message: message,
179203
})
180204
} else {
@@ -185,19 +209,19 @@ func (l *logMonitor) generateStatus(logs []*systemlogtypes.Log, rule systemlogty
185209
// Update transition timestamp and message when the condition
186210
// changes. Condition is considered to be changed only when
187211
// status or reason changes.
188-
if condition.Status == types.False || condition.Reason != rule.Reason {
212+
if condition.Status == types.False || condition.Reason != reason {
189213
condition.Transition = timestamp
190214
condition.Message = message
191215
events = append(events, util.GenerateConditionChangeEvent(
192216
condition.Type,
193217
types.True,
194-
rule.Reason,
218+
reason,
195219
message,
196220
timestamp,
197221
))
198222
}
199223
condition.Status = types.True
200-
condition.Reason = rule.Reason
224+
condition.Reason = reason
201225
changedConditions = append(changedConditions, condition)
202226
break
203227
}

pkg/systemlogmonitor/log_monitor_test.go

Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -408,6 +408,150 @@ func TestGenerateStatusForMetrics(t *testing.T) {
408408
}
409409
}
410410

411+
func TestGenerateStatusForEvents(t *testing.T) {
412+
for c, test := range []struct {
413+
name string
414+
rule logtypes.Rule
415+
expected *types.Status
416+
logs []*logtypes.Log
417+
}{
418+
{
419+
name: "without matching group",
420+
rule: logtypes.Rule{
421+
Type: types.Temp,
422+
Reason: "NvidiaGPUXid",
423+
Pattern: "NVRM: Xid \\(PCI:[^)]+\\): \\d+,.*",
424+
},
425+
logs: []*logtypes.Log{
426+
{
427+
Timestamp: time.Unix(1000, 1000),
428+
Message: "[...] NVRM: Xid (PCI:0000:00:00.0): 45, Ch 00000010",
429+
},
430+
},
431+
expected: &types.Status{
432+
Source: testSource,
433+
Events: []types.Event{{
434+
Severity: types.Warn,
435+
Timestamp: time.Unix(1000, 1000),
436+
Reason: "NvidiaGPUXid",
437+
Message: "[...] NVRM: Xid (PCI:0000:00:00.0): 45, Ch 00000010",
438+
}},
439+
},
440+
},
441+
{
442+
name: "one matching group",
443+
rule: logtypes.Rule{
444+
Type: types.Temp,
445+
Reason: "NvidiaGPUXid%s",
446+
Pattern: "NVRM: Xid \\(PCI:[^)]+\\): (\\d+),.*",
447+
},
448+
logs: []*logtypes.Log{
449+
{
450+
Timestamp: time.Unix(1000, 1000),
451+
Message: "[...] NVRM: Xid (PCI:0000:00:00.0): 45, Ch 00000010",
452+
},
453+
},
454+
expected: &types.Status{
455+
Source: testSource,
456+
Events: []types.Event{{
457+
Severity: types.Warn,
458+
Timestamp: time.Unix(1000, 1000),
459+
Reason: "NvidiaGPUXid45",
460+
Message: "[...] NVRM: Xid (PCI:0000:00:00.0): 45, Ch 00000010",
461+
}},
462+
},
463+
},
464+
{
465+
name: "two matching groups",
466+
rule: logtypes.Rule{
467+
Type: types.Temp,
468+
Reason: "NvidiaGPUXid%s, Ch%s",
469+
Pattern: "NVRM: Xid \\(PCI:[^)]+\\): (\\d+), Ch (\\d+).*",
470+
},
471+
logs: []*logtypes.Log{
472+
{
473+
Timestamp: time.Unix(1000, 1000),
474+
Message: "[...] NVRM: Xid (PCI:0000:00:00.0): 45, Ch 00000010",
475+
},
476+
},
477+
expected: &types.Status{
478+
Source: testSource,
479+
Events: []types.Event{{
480+
Severity: types.Warn,
481+
Timestamp: time.Unix(1000, 1000),
482+
Reason: "NvidiaGPUXid45, Ch00000010",
483+
Message: "[...] NVRM: Xid (PCI:0000:00:00.0): 45, Ch 00000010",
484+
}},
485+
},
486+
},
487+
{
488+
name: "not enough matching groups 1",
489+
rule: logtypes.Rule{
490+
Type: types.Temp,
491+
Reason: "NvidiaGPUXid%s, Ch%s",
492+
Pattern: "NVRM: Xid \\(PCI:[^)]+\\): (\\d+),.*",
493+
},
494+
logs: []*logtypes.Log{
495+
{
496+
Timestamp: time.Unix(1000, 1000),
497+
Message: "[...] NVRM: Xid (PCI:0000:00:00.0): 45, Ch 00000010",
498+
},
499+
},
500+
expected: nil,
501+
},
502+
{
503+
name: "not enough matching groups 2",
504+
rule: logtypes.Rule{
505+
Type: types.Temp,
506+
Reason: "NvidiaGPUXid%s",
507+
Pattern: "NVRM: Xid \\(PCI:[^)]+\\): \\d+,.*",
508+
},
509+
logs: []*logtypes.Log{
510+
{
511+
Timestamp: time.Unix(1000, 1000),
512+
Message: "[...] NVRM: Xid (PCI:0000:00:00.0): 45, Ch 00000010",
513+
},
514+
},
515+
expected: nil,
516+
},
517+
{
518+
name: "indexed matching groups",
519+
rule: logtypes.Rule{
520+
Type: types.Temp,
521+
Reason: "NvidiaGPUXid%[1]s, Ch%[2]s",
522+
Pattern: "NVRM: Xid \\(PCI:[^)]+\\): (\\d+), Ch (\\d+).*",
523+
},
524+
logs: []*logtypes.Log{
525+
{
526+
Timestamp: time.Unix(1000, 1000),
527+
Message: "[...] NVRM: Xid (PCI:0000:00:00.0): 45, Ch 00000010",
528+
},
529+
},
530+
expected: &types.Status{
531+
Source: testSource,
532+
Events: []types.Event{{
533+
Severity: types.Warn,
534+
Timestamp: time.Unix(1000, 1000),
535+
Reason: "NvidiaGPUXid45, Ch00000010",
536+
Message: "[...] NVRM: Xid (PCI:0000:00:00.0): 45, Ch 00000010",
537+
}},
538+
},
539+
},
540+
} {
541+
l := &logMonitor{
542+
config: MonitorConfig{
543+
Source: testSource,
544+
},
545+
}
546+
(&l.config).ApplyDefaultConfiguration()
547+
got := l.generateStatus(test.logs, test.rule)
548+
549+
if !reflect.DeepEqual(test.expected, got) {
550+
t.Errorf("case %d %s: expected status %+v, got %+v", c+1, test.name, test.expected, got)
551+
}
552+
}
553+
}
554+
411555
func TestInitializeProblemMetricsOrDie(t *testing.T) {
412556
testCases := []struct {
413557
name string

0 commit comments

Comments
 (0)