Shell: support regular expression for collection selection (#7158)

* support regular expression for collection selection

* refactor

* ordering

* fix exact match

* Update command_volume_balance_test.go

* simplify

* Update command_volume_balance.go

* comment
This commit is contained in:
Chris Lu
2025-08-23 11:04:24 -07:00
committed by GitHub
parent a367c39967
commit 41aedaa687
7 changed files with 111 additions and 30 deletions

View File

@@ -6,6 +6,7 @@ import (
"fmt"
"io"
"os"
"regexp"
"strings"
"time"
@@ -40,6 +41,14 @@ func (c *commandVolumeBalance) Help() string {
volume.balance [-collection ALL_COLLECTIONS|EACH_COLLECTION|<collection_name>] [-force] [-dataCenter=<data_center_name>] [-racks=rack_name_one,rack_name_two] [-nodes=192.168.0.1:8080,192.168.0.2:8080]
The -collection parameter supports:
- ALL_COLLECTIONS: balance across all collections
- EACH_COLLECTION: balance each collection separately
- Regular expressions for pattern matching:
* Use exact match: volume.balance -collection="^mybucket$"
* Match multiple buckets: volume.balance -collection="bucket.*"
* Match all user collections: volume.balance -collection="user-.*"
Algorithm:
For each type of volume server (different max volume count limit){
@@ -118,12 +127,23 @@ func (c *commandVolumeBalance) Do(args []string, commandEnv *CommandEnv, writer
return err
}
for _, col := range collections {
if err = c.balanceVolumeServers(diskTypes, volumeReplicas, volumeServers, col); err != nil {
// Use direct string comparison for exact match (more efficient than regex)
if err = c.balanceVolumeServers(diskTypes, volumeReplicas, volumeServers, nil, col); err != nil {
return err
}
}
} else if *collection == "ALL_COLLECTIONS" {
// Pass nil pattern for all collections
if err = c.balanceVolumeServers(diskTypes, volumeReplicas, volumeServers, nil, *collection); err != nil {
return err
}
} else {
if err = c.balanceVolumeServers(diskTypes, volumeReplicas, volumeServers, *collection); err != nil {
// Compile user-provided pattern
collectionPattern, err := compileCollectionPattern(*collection)
if err != nil {
return fmt.Errorf("invalid collection pattern '%s': %v", *collection, err)
}
if err = c.balanceVolumeServers(diskTypes, volumeReplicas, volumeServers, collectionPattern, *collection); err != nil {
return err
}
}
@@ -131,24 +151,29 @@ func (c *commandVolumeBalance) Do(args []string, commandEnv *CommandEnv, writer
return nil
}
func (c *commandVolumeBalance) balanceVolumeServers(diskTypes []types.DiskType, volumeReplicas map[uint32][]*VolumeReplica, nodes []*Node, collection string) error {
func (c *commandVolumeBalance) balanceVolumeServers(diskTypes []types.DiskType, volumeReplicas map[uint32][]*VolumeReplica, nodes []*Node, collectionPattern *regexp.Regexp, collectionName string) error {
for _, diskType := range diskTypes {
if err := c.balanceVolumeServersByDiskType(diskType, volumeReplicas, nodes, collection); err != nil {
if err := c.balanceVolumeServersByDiskType(diskType, volumeReplicas, nodes, collectionPattern, collectionName); err != nil {
return err
}
}
return nil
}
func (c *commandVolumeBalance) balanceVolumeServersByDiskType(diskType types.DiskType, volumeReplicas map[uint32][]*VolumeReplica, nodes []*Node, collection string) error {
func (c *commandVolumeBalance) balanceVolumeServersByDiskType(diskType types.DiskType, volumeReplicas map[uint32][]*VolumeReplica, nodes []*Node, collectionPattern *regexp.Regexp, collectionName string) error {
for _, n := range nodes {
n.selectVolumes(func(v *master_pb.VolumeInformationMessage) bool {
if collection != "ALL_COLLECTIONS" {
if v.Collection != collection {
return false
if collectionName != "ALL_COLLECTIONS" {
if collectionPattern != nil {
// Use regex pattern matching
if !collectionPattern.MatchString(v.Collection) {
return false
}
} else {
// Use exact string matching (for EACH_COLLECTION)
if v.Collection != collectionName {
return false
}
}
}
if v.DiskType != string(diskType) {