Skip to content

Commit c58e6f1

Browse files
authored
Merge pull request #20894 from ahrtr/20251106_learner
Fix the issue that `--force-new-cluster` can't clean up learner after creating v2 snapshot
2 parents 9f18836 + c9d06e9 commit c58e6f1

File tree

2 files changed

+70
-0
lines changed

2 files changed

+70
-0
lines changed

server/storage/util.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,9 @@ func GetEffectiveNodeIDsFromWALEntries(lg *zap.Logger, snap *raftpb.Snapshot, en
121121
for _, id := range snap.Metadata.ConfState.Voters {
122122
ids[id] = true
123123
}
124+
for _, id := range snap.Metadata.ConfState.Learners {
125+
ids[id] = true
126+
}
124127
}
125128
for _, e := range ents {
126129
if e.Type != raftpb.EntryConfChange {

tests/e2e/force_new_cluster_test.go

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ package e2e
1818

1919
import (
2020
"encoding/json"
21+
"strings"
2122
"testing"
2223
"time"
2324

@@ -29,6 +30,7 @@ import (
2930
"go.etcd.io/etcd/server/v3/storage/schema"
3031
"go.etcd.io/etcd/tests/v3/framework/config"
3132
"go.etcd.io/etcd/tests/v3/framework/e2e"
33+
"go.etcd.io/etcd/tests/v3/framework/testutils"
3234
)
3335

3436
// TestForceNewCluster verified that etcd works as expected when --force-new-cluster.
@@ -111,6 +113,71 @@ func TestForceNewCluster_MemberCount(t *testing.T) {
111113
require.Len(t, members, 1)
112114
}
113115

116+
// TestForceNewCluster_AddLearner_MemberCount verifies that `--force-new-cluster`
117+
// should always be able to clean up all other members, including learners.
118+
func TestForceNewCluster_AddLearner_MemberCount(t *testing.T) {
119+
e2e.BeforeTest(t)
120+
121+
testCases := []struct {
122+
name string
123+
snapcount int
124+
}{
125+
{
126+
name: "no snapshot after adding learner",
127+
snapcount: 0,
128+
},
129+
{
130+
name: "create a snapshot after adding learner",
131+
snapcount: 5,
132+
},
133+
}
134+
135+
for _, tc := range testCases {
136+
t.Run(tc.name, func(t *testing.T) {
137+
cfg := e2e.NewConfig(e2e.WithClusterSize(3))
138+
epc, err := e2e.NewEtcdProcessCluster(t.Context(), t, e2e.WithConfig(cfg), e2e.WithSnapshotCount(uint64(tc.snapcount)), e2e.WithKeepDataDir(true))
139+
require.NoError(t, err)
140+
141+
t.Log("Adding a learner member")
142+
testutils.ExecuteWithTimeout(t, 1*time.Minute, func() {
143+
for {
144+
_, aerr := epc.StartNewProc(t.Context(), nil, t, true)
145+
if aerr != nil {
146+
if strings.Contains(aerr.Error(), "etcdserver: unhealthy cluster") {
147+
time.Sleep(1 * time.Second)
148+
continue
149+
}
150+
}
151+
break
152+
}
153+
})
154+
155+
for i := 0; i < tc.snapcount; i++ {
156+
werr := epc.Etcdctl().Put(t.Context(), "foo", "bar", config.PutOptions{})
157+
require.NoError(t, werr)
158+
}
159+
require.NoError(t, epc.Close())
160+
161+
m := epc.Procs[0]
162+
t.Logf("Forcibly create a one-member cluster with member: %s", m.Config().Name)
163+
m.Config().Args = append(m.Config().Args, "--force-new-cluster")
164+
require.NoError(t, m.Start(t.Context()))
165+
166+
t.Log("Restarting the member")
167+
require.NoError(t, m.Restart(t.Context()))
168+
defer func() {
169+
t.Log("Closing the member")
170+
require.NoError(t, m.Close())
171+
}()
172+
173+
t.Log("Checking member count")
174+
resp, merr := m.Etcdctl().MemberList(t.Context(), false)
175+
require.NoError(t, merr)
176+
require.Len(t, resp.Members, 1)
177+
})
178+
}
179+
}
180+
114181
func mustReadMembersFromBoltDB(t *testing.T, dataDir string) []*membership.Member {
115182
dbPath := datadir.ToBackendFileName(dataDir)
116183
db, err := bbolt.Open(dbPath, 0o400, &bbolt.Options{ReadOnly: true})

0 commit comments

Comments
 (0)