mirror of
https://github.com/seaweedfs/seaweedfs.git
synced 2025-09-23 06:33:32 +08:00

* refactoring * add ec shard size * address comments * passing task id There seems to be a disconnect between the pending tasks created in ActiveTopology and the TaskDetectionResult returned by this function. A taskID is generated locally and used to create pending tasks via AddPendingECShardTask, but this taskID is not stored in the TaskDetectionResult or passed along in any way. This makes it impossible for the worker that eventually executes the task to know which pending task in ActiveTopology it corresponds to. Without the correct taskID, the worker cannot call AssignTask or CompleteTask on the master, breaking the entire task lifecycle and capacity management feature. A potential solution is to add a TaskID field to TaskDetectionResult and worker_pb.TaskParams, ensuring the ID is propagated from detection to execution. * 1 source multiple destinations * task supports multi source and destination * ec needs to clean up previous shards * use erasure coding constants * getPlanningCapacityUnsafe getEffectiveAvailableCapacityUnsafe should return StorageSlotChange for calculation * use CanAccommodate to calculate * remove dead code * address comments * fix Mutex Copying in Protobuf Structs * use constants * fix estimatedSize The calculation for estimatedSize only considers source.EstimatedSize and dest.StorageChange, but omits dest.EstimatedSize. The TaskDestination struct has an EstimatedSize field, which seems to be ignored here. This could lead to an incorrect estimation of the total size of data involved in tasks on a disk. The loop should probably also include estimatedSize += dest.EstimatedSize. * at.assignTaskToDisk(task) * refactoring * Update weed/admin/topology/internal.go Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> * fail fast * fix compilation * Update weed/worker/tasks/erasure_coding/detection.go Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> * indexes for volume and shard locations * dedup with ToVolumeSlots * return an additional boolean to indicate success, or an error * Update abstract_sql_store.go * fix * Update weed/worker/tasks/erasure_coding/detection.go Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> * Update weed/admin/topology/task_management.go Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> * faster findVolumeDisk * Update weed/worker/tasks/erasure_coding/detection.go Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Update weed/admin/topology/storage_slot_test.go Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * refactor * simplify * remove unused GetDiskStorageImpact function * refactor * add comments * Update weed/admin/topology/storage_impact.go Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> * Update weed/admin/topology/storage_slot_test.go Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> * Update storage_impact.go * AddPendingTask The unified AddPendingTask function now serves as the single entry point for all task creation, successfully consolidating the previously separate functions while maintaining full functionality and improving code organization. --------- Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
120 lines
4.2 KiB
Go
120 lines
4.2 KiB
Go
package vacuum
|
|
|
|
import (
|
|
"fmt"
|
|
"time"
|
|
|
|
"github.com/seaweedfs/seaweedfs/weed/glog"
|
|
"github.com/seaweedfs/seaweedfs/weed/pb/worker_pb"
|
|
"github.com/seaweedfs/seaweedfs/weed/worker/tasks/base"
|
|
"github.com/seaweedfs/seaweedfs/weed/worker/types"
|
|
)
|
|
|
|
// Detection implements the detection logic for vacuum tasks
|
|
func Detection(metrics []*types.VolumeHealthMetrics, clusterInfo *types.ClusterInfo, config base.TaskConfig) ([]*types.TaskDetectionResult, error) {
|
|
if !config.IsEnabled() {
|
|
return nil, nil
|
|
}
|
|
|
|
vacuumConfig := config.(*Config)
|
|
var results []*types.TaskDetectionResult
|
|
minVolumeAge := time.Duration(vacuumConfig.MinVolumeAgeSeconds) * time.Second
|
|
|
|
debugCount := 0
|
|
skippedDueToGarbage := 0
|
|
skippedDueToAge := 0
|
|
|
|
for _, metric := range metrics {
|
|
// Check if volume needs vacuum
|
|
if metric.GarbageRatio >= vacuumConfig.GarbageThreshold && metric.Age >= minVolumeAge {
|
|
priority := types.TaskPriorityNormal
|
|
if metric.GarbageRatio > 0.6 {
|
|
priority = types.TaskPriorityHigh
|
|
}
|
|
|
|
// Generate task ID for future ActiveTopology integration
|
|
taskID := fmt.Sprintf("vacuum_vol_%d_%d", metric.VolumeID, time.Now().Unix())
|
|
|
|
result := &types.TaskDetectionResult{
|
|
TaskID: taskID, // For future ActiveTopology integration
|
|
TaskType: types.TaskTypeVacuum,
|
|
VolumeID: metric.VolumeID,
|
|
Server: metric.Server,
|
|
Collection: metric.Collection,
|
|
Priority: priority,
|
|
Reason: "Volume has excessive garbage requiring vacuum",
|
|
ScheduleAt: time.Now(),
|
|
}
|
|
|
|
// Create typed parameters for vacuum task
|
|
result.TypedParams = createVacuumTaskParams(result, metric, vacuumConfig)
|
|
results = append(results, result)
|
|
} else {
|
|
// Debug why volume was not selected
|
|
if debugCount < 5 { // Limit debug output to first 5 volumes
|
|
if metric.GarbageRatio < vacuumConfig.GarbageThreshold {
|
|
skippedDueToGarbage++
|
|
}
|
|
if metric.Age < minVolumeAge {
|
|
skippedDueToAge++
|
|
}
|
|
}
|
|
debugCount++
|
|
}
|
|
}
|
|
|
|
// Log debug summary if no tasks were created
|
|
if len(results) == 0 && len(metrics) > 0 {
|
|
totalVolumes := len(metrics)
|
|
glog.Infof("VACUUM: No tasks created for %d volumes. Threshold=%.2f%%, MinAge=%s. Skipped: %d (garbage<threshold), %d (age<minimum)",
|
|
totalVolumes, vacuumConfig.GarbageThreshold*100, minVolumeAge, skippedDueToGarbage, skippedDueToAge)
|
|
|
|
// Show details for first few volumes
|
|
for i, metric := range metrics {
|
|
if i >= 3 { // Limit to first 3 volumes
|
|
break
|
|
}
|
|
glog.Infof("VACUUM: Volume %d: garbage=%.2f%% (need ≥%.2f%%), age=%s (need ≥%s)",
|
|
metric.VolumeID, metric.GarbageRatio*100, vacuumConfig.GarbageThreshold*100,
|
|
metric.Age.Truncate(time.Minute), minVolumeAge.Truncate(time.Minute))
|
|
}
|
|
}
|
|
|
|
return results, nil
|
|
}
|
|
|
|
// createVacuumTaskParams creates typed parameters for vacuum tasks
|
|
// This function is moved from MaintenanceIntegration.createVacuumTaskParams to the detection logic
|
|
func createVacuumTaskParams(task *types.TaskDetectionResult, metric *types.VolumeHealthMetrics, vacuumConfig *Config) *worker_pb.TaskParams {
|
|
// Use configured values or defaults
|
|
garbageThreshold := 0.3 // Default 30%
|
|
verifyChecksum := true // Default to verify
|
|
batchSize := int32(1000) // Default batch size
|
|
workingDir := "/tmp/seaweedfs_vacuum_work" // Default working directory
|
|
|
|
if vacuumConfig != nil {
|
|
garbageThreshold = vacuumConfig.GarbageThreshold
|
|
// Note: VacuumTaskConfig has GarbageThreshold, MinVolumeAgeHours, MinIntervalSeconds
|
|
// Other fields like VerifyChecksum, BatchSize, WorkingDir would need to be added
|
|
// to the protobuf definition if they should be configurable
|
|
}
|
|
|
|
// Create typed protobuf parameters
|
|
return &worker_pb.TaskParams{
|
|
TaskId: task.TaskID, // Link to ActiveTopology pending task (if integrated)
|
|
VolumeId: task.VolumeID,
|
|
Server: task.Server,
|
|
Collection: task.Collection,
|
|
VolumeSize: metric.Size, // Store original volume size for tracking changes
|
|
TaskParams: &worker_pb.TaskParams_VacuumParams{
|
|
VacuumParams: &worker_pb.VacuumTaskParams{
|
|
GarbageThreshold: garbageThreshold,
|
|
ForceVacuum: false,
|
|
BatchSize: batchSize,
|
|
WorkingDir: workingDir,
|
|
VerifyChecksum: verifyChecksum,
|
|
},
|
|
},
|
|
}
|
|
}
|