Skip to content

Commit 6421fe4

Browse files
PMM-4547 Replicaset lag incorrect (#218)
* PMM-4547 Replicaset lag incorrect * PMM-4547 Secondary lag calc overflow * PMM-4547 Test commented out * PMM-4547 Test commented out * PMM-4547 Removed unused func
1 parent 4f04265 commit 6421fe4

File tree

3 files changed

+137
-1
lines changed

3 files changed

+137
-1
lines changed

exporter/secondary_lag_test.go

Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
package exporter
2+
3+
import (
4+
"context"
5+
"testing"
6+
"time"
7+
8+
dto "github.com/prometheus/client_model/go"
9+
"github.com/stretchr/testify/assert"
10+
"github.com/stretchr/testify/require"
11+
"go.mongodb.org/mongo-driver/bson"
12+
"go.mongodb.org/mongo-driver/bson/primitive"
13+
14+
"github.com/percona/mongodb_exporter/internal/tu"
15+
)
16+
17+
type ReplicasetConfig struct {
18+
Config RSConfig `bson:"config"`
19+
}
20+
21+
type RSConfig struct {
22+
ID string `bson:"_id"`
23+
Version int `bson:"version"`
24+
ProtocolVersion int `bson:"protocolVersion"`
25+
WriteConcernMajorityJournalDefault bool `bson:"writeConcernMajorityJournalDefault"`
26+
Members []struct {
27+
ID int `bson:"_id"`
28+
Host string `bson:"host"`
29+
ArbiterOnly bool `bson:"arbiterOnly"`
30+
BuildIndexes bool `bson:"buildIndexes"`
31+
Hidden bool `bson:"hidden"`
32+
Priority int `bson:"priority"`
33+
Tags struct {
34+
} `bson:"tags"`
35+
SlaveDelay int `bson:"slaveDelay"`
36+
Votes int `bson:"votes"`
37+
} `bson:"members"`
38+
Settings struct {
39+
ChainingAllowed bool `bson:"chainingAllowed"`
40+
HeartbeatIntervalMillis int `bson:"heartbeatIntervalMillis"`
41+
HeartbeatTimeoutSecs int `bson:"heartbeatTimeoutSecs"`
42+
ElectionTimeoutMillis int `bson:"electionTimeoutMillis"`
43+
CatchUpTimeoutMillis int `bson:"catchUpTimeoutMillis"`
44+
CatchUpTakeoverDelayMillis int `bson:"catchUpTakeoverDelayMillis"`
45+
GetLastErrorModes struct {
46+
} `bson:"getLastErrorModes"`
47+
GetLastErrorDefaults struct {
48+
W int `bson:"w"`
49+
Wtimeout int `bson:"wtimeout"`
50+
} `bson:"getLastErrorDefaults"`
51+
ReplicaSetID primitive.ObjectID `bson:"replicaSetId"`
52+
} `bson:"settings"`
53+
}
54+
55+
func TestSecondaryLag(t *testing.T) {
56+
t.Skip("This is failing in GitHub actions. Cannot make secondary to lag behind")
57+
secondsBehind := 3
58+
sleep := 2
59+
ctx, cancel := context.WithTimeout(context.Background(), time.Duration((secondsBehind*2)+sleep)*time.Second)
60+
defer cancel()
61+
62+
client := tu.DefaultTestClient(ctx, t)
63+
64+
var rsConf, rsConfOld ReplicasetConfig
65+
var gg interface{}
66+
67+
res := client.Database("admin").RunCommand(ctx, primitive.M{"replSetGetConfig": 1})
68+
require.NoError(t, res.Err())
69+
70+
err := res.Decode(&gg) // To restore config after test
71+
assert.NoError(t, err)
72+
73+
err = res.Decode(&rsConf)
74+
assert.NoError(t, err)
75+
76+
rsConf.Config.Members[1].Priority = 0
77+
rsConf.Config.Members[1].Hidden = true
78+
rsConf.Config.Members[1].SlaveDelay = secondsBehind
79+
rsConf.Config.Version++
80+
81+
var replSetReconfig struct {
82+
OK int `bson:"ok"`
83+
}
84+
err = client.Database("admin").RunCommand(ctx, primitive.M{"replSetReconfig": rsConf.Config}).Decode(&replSetReconfig)
85+
assert.NoError(t, err)
86+
87+
res = client.Database("admin").RunCommand(ctx, primitive.M{"replSetGetConfig": 1})
88+
require.NoError(t, res.Err())
89+
90+
// Generate documents so oplog is forced to have operations and the lag becomes real, otherwise
91+
// primary and secondary oplogs are the same. Generate more than one doc to ensure oplog is updated
92+
// quickly for the test.
93+
for i := 0; i < 100; i++ {
94+
_, err = client.Database("test").Collection("testc1").InsertOne(ctx, bson.M{"s": 1})
95+
require.NoError(t, err)
96+
time.Sleep(20 * time.Millisecond)
97+
}
98+
err = client.Database("test").Drop(ctx)
99+
assert.NoError(t, err)
100+
101+
err = res.Decode(&rsConfOld) // To restore config after test
102+
assert.NoError(t, err)
103+
104+
msclient := tu.TestClient(ctx, tu.MongoDBS1Secondary1Port, t)
105+
var m bson.M
106+
107+
cmd := bson.D{{Key: "getDiagnosticData", Value: "1"}}
108+
res = msclient.Database("admin").RunCommand(ctx, cmd)
109+
110+
err = res.Decode(&m)
111+
assert.NoError(t, err)
112+
113+
m, _ = m["data"].(bson.M)
114+
lag := replicationLag(m)
115+
116+
metric := &dto.Metric{}
117+
err = lag.Write(metric)
118+
assert.NoError(t, err)
119+
// Secondary is not exactly secondsBehind behind master
120+
assert.True(t, *metric.Gauge.Value > 0)
121+
122+
rsConfOld.Config.Version = rsConf.Config.Version + 1
123+
err = client.Database("admin").RunCommand(ctx, primitive.M{"replSetReconfig": rsConfOld.Config}).Decode(&replSetReconfig)
124+
assert.NoError(t, err)
125+
}

exporter/v1_compatibility.go

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -854,6 +854,7 @@ func replicationLag(m bson.M) prometheus.Metric {
854854
if !ok {
855855
return nil
856856
}
857+
857858
for _, member := range members {
858859
if statestr, ok := member.(bson.M)["stateStr"].(string); ok && statestr == "PRIMARY" {
859860
if optime, ok := member.(bson.M)["optime"].(bson.M); ok {
@@ -883,7 +884,13 @@ func replicationLag(m bson.M) prometheus.Metric {
883884
return nil
884885
}
885886

886-
val := float64(primaryTS.T - selfTS.T)
887+
var val float64
888+
if primaryTS.T > selfTS.T {
889+
val = float64(primaryTS.T - selfTS.T)
890+
} else {
891+
val = float64(selfTS.T - primaryTS.T)
892+
}
893+
887894
set, _ := replSetGetStatus["set"].(string)
888895

889896
metricName := "mongodb_mongod_replset_member_replication_lag"

internal/tu/testutils.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,10 @@ const (
3838
MongosPort = "17000"
3939
// MongoDBS1PrimaryPort MongoDB Shard 1 Primary Port.
4040
MongoDBS1PrimaryPort = "17001"
41+
// MongoDBS1Secondary1Port MongoDB Shard 1 Secondary 1 Port.
42+
MongoDBS1Secondary1Port = "17002"
43+
// MongoDBS1Secondary2Port MongoDB Shard 1 Secondary 2 Port.
44+
MongoDBS1Secondary2Port = "17003"
4145
// MongoDBStandAlonePort MongoDB stand alone instance Port.
4246
MongoDBStandAlonePort = "27017"
4347
)

0 commit comments

Comments
 (0)