Mixed-version cluster compatibility

This commit is contained in:
chrislu
2025-08-10 15:54:30 -07:00
parent d326affc4f
commit 3ef8a9f3b2
3 changed files with 45 additions and 7 deletions

View File

@@ -36,6 +36,25 @@ Steps to apply erasure coding to .dat .idx files
*/
// isGenerationCompatible checks if requested and actual generations are compatible
// for mixed-version cluster support
func isGenerationCompatible(actualGeneration, requestedGeneration uint32) bool {
// Exact match is always compatible
if actualGeneration == requestedGeneration {
return true
}
// Mixed-version compatibility: if client requests generation 0 (default/legacy),
// allow access to any generation for backward compatibility
if requestedGeneration == 0 {
return true
}
// If client requests specific generation but volume has different generation,
// this is not compatible (strict generation matching)
return false
}
// VolumeEcShardsGenerate generates the .ecx and .ec00 ~ .ec13 files
func (vs *VolumeServer) VolumeEcShardsGenerate(ctx context.Context, req *volume_server_pb.VolumeEcShardsGenerateRequest) (*volume_server_pb.VolumeEcShardsGenerateResponse, error) {
@@ -370,9 +389,9 @@ func (vs *VolumeServer) VolumeEcShardRead(req *volume_server_pb.VolumeEcShardRea
return fmt.Errorf("VolumeEcShardRead not found ec volume id %d", req.VolumeId)
}
// Validate generation matches the request
// Validate generation matches with mixed-version compatibility
requestedGeneration := req.Generation
if ecVolume.Generation != requestedGeneration {
if !isGenerationCompatible(ecVolume.Generation, requestedGeneration) {
return fmt.Errorf("VolumeEcShardRead volume %d generation mismatch: requested %d, found %d",
req.VolumeId, requestedGeneration, ecVolume.Generation)
}

View File

@@ -273,13 +273,23 @@ func (s *Store) cachedLookupEcShardLocations(ecVolume *erasure_coding.EcVolume)
ecVolume.ShardLocationsLock.Lock()
for _, shardIdLocations := range resp.ShardIdLocations {
// Validate that the returned generation matches our request
if shardIdLocations.Generation != ecVolume.Generation {
// Mixed-version compatibility: be more flexible with generation matching
// If we requested generation 0 or if the response has generation 0 (older master),
// be more permissive to support rolling upgrades
generationMatches := shardIdLocations.Generation == ecVolume.Generation
mixedVersionCompatible := (ecVolume.Generation == 0 || shardIdLocations.Generation == 0)
if !generationMatches && !mixedVersionCompatible {
glog.Warningf("received shard locations for generation %d but requested generation %d for volume %d shard %d",
shardIdLocations.Generation, ecVolume.Generation, ecVolume.VolumeId, shardIdLocations.ShardId)
continue // skip mismatched generation shards
}
if !generationMatches && mixedVersionCompatible {
glog.V(1).Infof("accepting shard locations with generation mismatch for mixed-version compatibility: volume %d shard %d response_gen=%d requested_gen=%d",
ecVolume.VolumeId, shardIdLocations.ShardId, shardIdLocations.Generation, ecVolume.Generation)
}
shardId := erasure_coding.ShardId(shardIdLocations.ShardId)
delete(ecVolume.ShardLocations, shardId)
for _, loc := range shardIdLocations.Locations {

View File

@@ -307,35 +307,44 @@ func (t *Topology) ListEcVolumesWithActiveGeneration() map[needle.VolumeId]uint3
}
// LookupEcShardsWithFallback looks up EC shards for a volume with intelligent fallback
// This function provides mixed-version cluster compatibility by falling back gracefully
// If no specific generation is requested (generation == 0), it uses the active generation
// If the requested/active generation is not found, it falls back to generation 0
func (t *Topology) LookupEcShardsWithFallback(vid needle.VolumeId, requestedGeneration uint32) (locations *EcShardLocations, actualGeneration uint32, found bool) {
// Determine target generation
targetGeneration := requestedGeneration
if requestedGeneration == 0 {
// Use active generation if available
// Use active generation if available (new behavior)
if activeGen, exists := t.GetEcActiveGeneration(vid); exists {
targetGeneration = activeGen
glog.V(4).Infof("LookupEcShardsWithFallback: using active generation %d for volume %d", activeGen, vid)
}
}
// Try the target generation first
if locations, found = t.LookupEcShards(vid, targetGeneration); found {
if targetGeneration != requestedGeneration {
glog.V(3).Infof("LookupEcShardsWithFallback: found volume %d generation %d (requested %d)", vid, targetGeneration, requestedGeneration)
}
return locations, targetGeneration, true
}
// If requested specific generation and not found, don't fallback
// If requested specific generation and not found, don't fallback for strict clients
if requestedGeneration != 0 {
glog.V(2).Infof("LookupEcShardsWithFallback: volume %d generation %d not found, no fallback for specific request", vid, requestedGeneration)
return nil, 0, false
}
// Fallback to generation 0 if target generation wasn't found
// Mixed-version compatibility: fallback to generation 0 if target generation wasn't found
// This helps during rolling upgrades when some shards might not have generation info yet
if targetGeneration != 0 {
if locations, found = t.LookupEcShards(vid, 0); found {
glog.V(2).Infof("LookupEcShardsWithFallback: falling back to generation 0 for volume %d (target generation %d not found)", vid, targetGeneration)
return locations, 0, true
}
}
glog.V(2).Infof("LookupEcShardsWithFallback: volume %d not found in any generation", vid)
return nil, 0, false
}