mirror of
https://github.com/seaweedfs/seaweedfs.git
synced 2025-09-19 07:19:23 +08:00
remove old raft servers if they don't answer to pings for too long (#3398)
* remove old raft servers if they don't answer to pings for too long add ping durations as options rename ping fields fix some todos get masters through masterclient raft remove server from leader use raft servers to ping them CheckMastersAlive for hashicorp raft only * prepare blocking ping * pass waitForReady as param * pass waitForReady through all functions * waitForReady works * refactor * remove unneeded params * rollback unneeded changes * fix
This commit is contained in:
@@ -1,8 +1,8 @@
|
||||
package weed_server
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"github.com/seaweedfs/seaweedfs/weed/stats"
|
||||
"net/http"
|
||||
"net/http/httputil"
|
||||
"net/url"
|
||||
@@ -12,6 +12,8 @@ import (
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/seaweedfs/seaweedfs/weed/stats"
|
||||
|
||||
"github.com/seaweedfs/seaweedfs/weed/cluster"
|
||||
"github.com/seaweedfs/seaweedfs/weed/pb"
|
||||
|
||||
@@ -242,7 +244,6 @@ func (ms *MasterServer) proxyToLeader(f http.HandlerFunc) http.HandlerFunc {
|
||||
}
|
||||
|
||||
func (ms *MasterServer) startAdminScripts() {
|
||||
|
||||
v := util.GetViper()
|
||||
adminScripts := v.GetString("master.maintenance.scripts")
|
||||
if adminScripts == "" {
|
||||
@@ -342,8 +343,10 @@ func (ms *MasterServer) OnPeerUpdate(update *master_pb.ClusterNodeUpdate, startF
|
||||
|
||||
peerAddress := pb.ServerAddress(update.Address)
|
||||
peerName := string(peerAddress)
|
||||
isLeader := ms.Topo.HashicorpRaft.State() == hashicorpRaft.Leader
|
||||
if update.IsAdd && isLeader {
|
||||
if ms.Topo.HashicorpRaft.State() != hashicorpRaft.Leader {
|
||||
return
|
||||
}
|
||||
if update.IsAdd {
|
||||
raftServerFound := false
|
||||
for _, server := range ms.Topo.HashicorpRaft.GetConfiguration().Configuration().Servers {
|
||||
if string(server.ID) == peerName {
|
||||
@@ -356,5 +359,27 @@ func (ms *MasterServer) OnPeerUpdate(update *master_pb.ClusterNodeUpdate, startF
|
||||
hashicorpRaft.ServerID(peerName),
|
||||
hashicorpRaft.ServerAddress(peerAddress.ToGrpcAddress()), 0, 0)
|
||||
}
|
||||
} else {
|
||||
pb.WithMasterClient(false, peerAddress, ms.grpcDialOption, true, func(client master_pb.SeaweedClient) error {
|
||||
ctx, cancel := context.WithTimeout(context.TODO(), time.Minute*72)
|
||||
defer cancel()
|
||||
if _, err := client.Ping(ctx, &master_pb.PingRequest{Target: string(peerAddress), TargetType: cluster.MasterType}); err != nil {
|
||||
glog.V(0).Infof("master %s didn't respond to pings. remove raft server", peerName)
|
||||
if err := ms.MasterClient.WithClient(false, func(client master_pb.SeaweedClient) error {
|
||||
_, err := client.RaftRemoveServer(context.Background(), &master_pb.RaftRemoveServerRequest{
|
||||
Id: peerName,
|
||||
Force: false,
|
||||
})
|
||||
return err
|
||||
}); err != nil {
|
||||
glog.Warningf("failed removing old raft server: %v", err)
|
||||
return err
|
||||
}
|
||||
} else {
|
||||
glog.V(0).Infof("master %s successfully responded to ping", peerName)
|
||||
}
|
||||
|
||||
return nil
|
||||
})
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user