diff --git a/server/etcdserver/api/membership/cluster.go b/server/etcdserver/api/membership/cluster.go index 6becdfd62ed..f67d7f2c24d 100644 --- a/server/etcdserver/api/membership/cluster.go +++ b/server/etcdserver/api/membership/cluster.go @@ -783,11 +783,7 @@ func (c *RaftCluster) IsLocalMemberLearner() bool { defer c.Unlock() localMember, ok := c.members[c.localID] if !ok { - c.lg.Panic( - "failed to find local ID in cluster members", - zap.String("cluster-id", c.cid.String()), - zap.String("local-member-id", c.localID.String()), - ) + return false } return localMember.IsLearner } @@ -816,6 +812,9 @@ func (c *RaftCluster) SetDowngradeInfo(d *serverversion.DowngradeInfo, shouldApp // IsMemberExist returns if the member with the given id exists in cluster. func (c *RaftCluster) IsMemberExist(id types.ID) bool { + // gofail: var sleepAfterIsMemberExist struct{} + // defer time.Sleep(time.Second) + c.Lock() defer c.Unlock() _, ok := c.members[id] diff --git a/server/etcdserver/api/v3rpc/interceptor.go b/server/etcdserver/api/v3rpc/interceptor.go index 65e068ebbfc..72916ab676f 100644 --- a/server/etcdserver/api/v3rpc/interceptor.go +++ b/server/etcdserver/api/v3rpc/interceptor.go @@ -49,7 +49,7 @@ func newUnaryInterceptor(s *etcdserver.EtcdServer) grpc.UnaryServerInterceptor { return nil, rpctypes.ErrGRPCNotCapable } - if s.IsMemberExist(s.MemberID()) && s.IsLearner() && !isRPCSupportedForLearner(req) { + if s.IsLearner() && !isRPCSupportedForLearner(req) { return nil, rpctypes.ErrGRPCNotSupportedForLearner } @@ -218,7 +218,7 @@ func newStreamInterceptor(s *etcdserver.EtcdServer) grpc.StreamServerInterceptor return rpctypes.ErrGRPCNotCapable } - if s.IsMemberExist(s.MemberID()) && s.IsLearner() && info.FullMethod != snapshotMethod { // learner does not support stream RPC except Snapshot + if s.IsLearner() && info.FullMethod != snapshotMethod { // learner does not support stream RPC except Snapshot return rpctypes.ErrGRPCNotSupportedForLearner } diff --git a/server/etcdserver/server.go b/server/etcdserver/server.go index ba3a3f3ffe1..0615d20e8b3 100644 --- a/server/etcdserver/server.go +++ b/server/etcdserver/server.go @@ -1224,7 +1224,8 @@ func (s *EtcdServer) isLeader() bool { // MoveLeader transfers the leader to the given transferee. func (s *EtcdServer) MoveLeader(ctx context.Context, lead, transferee uint64) error { - if !s.cluster.IsMemberExist(types.ID(transferee)) || s.cluster.Member(types.ID(transferee)).IsLearner { + member := s.cluster.Member(types.ID(transferee)) + if member == nil || member.IsLearner { return errors.ErrBadLeaderTransferee } @@ -1593,9 +1594,9 @@ func (s *EtcdServer) mayRemoveMember(id types.ID) error { } lg := s.Logger() - isLearner := s.cluster.IsMemberExist(id) && s.cluster.Member(id).IsLearner + member := s.cluster.Member(id) // no need to check quorum when removing non-voting member - if isLearner { + if member != nil && member.IsLearner { return nil } diff --git a/tests/integration/cluster_test.go b/tests/integration/cluster_test.go index 29f8ae8dd5d..7711284f72a 100644 --- a/tests/integration/cluster_test.go +++ b/tests/integration/cluster_test.go @@ -518,3 +518,51 @@ func TestSpeedyTerminate(t *testing.T) { case <-donec: } } + +// TestConcurrentRemoveMember demonstrated a panic in mayRemoveMember with +// concurrent calls to MemberRemove. To reliably reproduce the panic, a delay +// needed to be injected in IsMemberExist, which is done using a failpoint. +// After fixing the bug, IsMemberExist is no longer called by mayRemoveMember. +func TestConcurrentRemoveMember(t *testing.T) { + integration.BeforeTest(t, integration.WithFailpoint("sleepAfterIsMemberExist", `return`)) + c := integration.NewCluster(t, &integration.ClusterConfig{Size: 1}) + defer c.Terminate(t) + + addResp, err := c.Members[0].Client.MemberAddAsLearner(context.Background(), []string{"http://localhost:123"}) + if err != nil { + t.Fatal(err) + } + removeID := addResp.Member.ID + done := make(chan struct{}) + go func() { + time.Sleep(time.Second / 2) + c.Members[0].Client.MemberRemove(context.Background(), removeID) + close(done) + }() + if _, err := c.Members[0].Client.MemberRemove(context.Background(), removeID); err != nil { + t.Fatal(err) + } + <-done +} + +func TestConcurrentMoveLeader(t *testing.T) { + integration.BeforeTest(t, integration.WithFailpoint("sleepAfterIsMemberExist", `return`)) + c := integration.NewCluster(t, &integration.ClusterConfig{Size: 1}) + defer c.Terminate(t) + + addResp, err := c.Members[0].Client.MemberAddAsLearner(context.Background(), []string{"http://localhost:123"}) + if err != nil { + t.Fatal(err) + } + removeID := addResp.Member.ID + done := make(chan struct{}) + go func() { + time.Sleep(time.Second / 2) + c.Members[0].Client.MoveLeader(context.Background(), removeID) + close(done) + }() + if _, err := c.Members[0].Client.MemberRemove(context.Background(), removeID); err != nil { + t.Fatal(err) + } + <-done +} diff --git a/tests/robustness/makefile.mk b/tests/robustness/makefile.mk index 8c40436cd2f..09d10108fb6 100644 --- a/tests/robustness/makefile.mk +++ b/tests/robustness/makefile.mk @@ -54,7 +54,7 @@ install-gofail: $(GOPATH)/bin/gofail .PHONY: gofail-enable gofail-enable: $(GOPATH)/bin/gofail - $(GOPATH)/bin/gofail enable server/etcdserver/ server/lease/leasehttp server/storage/backend/ server/storage/mvcc/ server/storage/wal/ server/etcdserver/api/v3rpc/ + $(GOPATH)/bin/gofail enable server/etcdserver/ server/lease/leasehttp server/storage/backend/ server/storage/mvcc/ server/storage/wal/ server/etcdserver/api/v3rpc/ server/etcdserver/api/membership/ cd ./server && go get go.etcd.io/gofail@${GOFAIL_VERSION} cd ./etcdutl && go get go.etcd.io/gofail@${GOFAIL_VERSION} cd ./etcdctl && go get go.etcd.io/gofail@${GOFAIL_VERSION} @@ -62,7 +62,7 @@ gofail-enable: $(GOPATH)/bin/gofail .PHONY: gofail-disable gofail-disable: $(GOPATH)/bin/gofail - $(GOPATH)/bin/gofail disable server/etcdserver/ server/lease/leasehttp server/storage/backend/ server/storage/mvcc/ server/storage/wal/ server/etcdserver/api/v3rpc/ + $(GOPATH)/bin/gofail disable server/etcdserver/ server/lease/leasehttp server/storage/backend/ server/storage/mvcc/ server/storage/wal/ server/etcdserver/api/v3rpc/ server/etcdserver/api/membership/ cd ./server && go mod tidy cd ./etcdutl && go mod tidy cd ./etcdctl && go mod tidy