fix hanging task detail page

This commit is contained in:
chrislu
2025-08-11 23:55:24 -07:00
parent 56db0a081a
commit c8c758e639
2 changed files with 105 additions and 49 deletions

View File

@@ -20,6 +20,7 @@ import (
"github.com/seaweedfs/seaweedfs/weed/pb/master_pb"
"github.com/seaweedfs/seaweedfs/weed/pb/mq_pb"
"github.com/seaweedfs/seaweedfs/weed/pb/schema_pb"
"github.com/seaweedfs/seaweedfs/weed/pb/worker_pb"
"github.com/seaweedfs/seaweedfs/weed/security"
"github.com/seaweedfs/seaweedfs/weed/util"
"github.com/seaweedfs/seaweedfs/weed/wdclient"
@@ -1198,47 +1199,75 @@ func (as *AdminServer) GetMaintenanceTaskDetail(taskID string) (*maintenance.Tas
// Get execution logs from worker if task is active/completed and worker is connected
if task.Status == maintenance.TaskStatusInProgress || task.Status == maintenance.TaskStatusCompleted {
if as.workerGrpcServer != nil && task.WorkerID != "" {
workerLogs, err := as.workerGrpcServer.RequestTaskLogs(task.WorkerID, taskID, 100, "")
if err == nil && len(workerLogs) > 0 {
// Convert worker logs to maintenance logs
for _, workerLog := range workerLogs {
maintenanceLog := &maintenance.TaskExecutionLog{
Timestamp: time.Unix(workerLog.Timestamp, 0),
Level: workerLog.Level,
Message: workerLog.Message,
Source: "worker",
// Add additional timeout protection for worker log requests
type logResult struct {
logs []*worker_pb.TaskLogEntry
err error
}
logChan := make(chan logResult, 1)
go func() {
workerLogs, err := as.workerGrpcServer.RequestTaskLogs(task.WorkerID, taskID, 100, "")
logChan <- logResult{logs: workerLogs, err: err}
}()
// Wait for logs with timeout
select {
case result := <-logChan:
if result.err == nil && len(result.logs) > 0 {
workerLogs := result.logs
// Convert worker logs to maintenance logs
for _, workerLog := range workerLogs {
maintenanceLog := &maintenance.TaskExecutionLog{
Timestamp: time.Unix(workerLog.Timestamp, 0),
Level: workerLog.Level,
Message: workerLog.Message,
Source: "worker",
TaskID: taskID,
WorkerID: task.WorkerID,
}
// carry structured fields if present
if len(workerLog.Fields) > 0 {
maintenanceLog.Fields = make(map[string]string, len(workerLog.Fields))
for k, v := range workerLog.Fields {
maintenanceLog.Fields[k] = v
}
}
// carry optional progress/status
if workerLog.Progress != 0 {
p := float64(workerLog.Progress)
maintenanceLog.Progress = &p
}
if workerLog.Status != "" {
maintenanceLog.Status = workerLog.Status
}
taskDetail.ExecutionLogs = append(taskDetail.ExecutionLogs, maintenanceLog)
}
} else if result.err != nil {
// Add a diagnostic log entry when worker logs cannot be retrieved
diagnosticLog := &maintenance.TaskExecutionLog{
Timestamp: time.Now(),
Level: "WARNING",
Message: fmt.Sprintf("Failed to retrieve worker logs: %v", result.err),
Source: "admin",
TaskID: taskID,
WorkerID: task.WorkerID,
}
// carry structured fields if present
if len(workerLog.Fields) > 0 {
maintenanceLog.Fields = make(map[string]string, len(workerLog.Fields))
for k, v := range workerLog.Fields {
maintenanceLog.Fields[k] = v
}
}
// carry optional progress/status
if workerLog.Progress != 0 {
p := float64(workerLog.Progress)
maintenanceLog.Progress = &p
}
if workerLog.Status != "" {
maintenanceLog.Status = workerLog.Status
}
taskDetail.ExecutionLogs = append(taskDetail.ExecutionLogs, maintenanceLog)
taskDetail.ExecutionLogs = append(taskDetail.ExecutionLogs, diagnosticLog)
glog.V(1).Infof("Failed to get worker logs for task %s from worker %s: %v", taskID, task.WorkerID, result.err)
}
} else if err != nil {
// Add a diagnostic log entry when worker logs cannot be retrieved
diagnosticLog := &maintenance.TaskExecutionLog{
case <-time.After(8 * time.Second):
// Timeout getting logs from worker
timeoutLog := &maintenance.TaskExecutionLog{
Timestamp: time.Now(),
Level: "WARNING",
Message: fmt.Sprintf("Failed to retrieve worker logs: %v", err),
Message: "Timeout retrieving worker logs - worker may be unresponsive or busy",
Source: "admin",
TaskID: taskID,
WorkerID: task.WorkerID,
}
taskDetail.ExecutionLogs = append(taskDetail.ExecutionLogs, diagnosticLog)
glog.V(1).Infof("Failed to get worker logs for task %s from worker %s: %v", taskID, task.WorkerID, err)
taskDetail.ExecutionLogs = append(taskDetail.ExecutionLogs, timeoutLog)
glog.Warningf("Timeout getting worker logs for task %s from worker %s", taskID, task.WorkerID)
}
} else {
// Add diagnostic information when worker is not available

View File

@@ -38,26 +38,53 @@ func (h *MaintenanceHandlers) ShowTaskDetail(c *gin.Context) {
taskID := c.Param("id")
glog.Infof("DEBUG ShowTaskDetail: Starting for task ID: %s", taskID)
taskDetail, err := h.adminServer.GetMaintenanceTaskDetail(taskID)
if err != nil {
glog.Errorf("DEBUG ShowTaskDetail: error getting task detail for %s: %v", taskID, err)
c.String(http.StatusNotFound, "Task not found: %s (Error: %v)", taskID, err)
// Add timeout to prevent indefinite hangs when worker is unresponsive
ctx, cancel := context.WithTimeout(c.Request.Context(), 15*time.Second)
defer cancel()
// Use a channel to handle timeout for task detail retrieval
type result struct {
taskDetail *maintenance.TaskDetailData
err error
}
resultChan := make(chan result, 1)
go func() {
taskDetail, err := h.adminServer.GetMaintenanceTaskDetail(taskID)
resultChan <- result{taskDetail: taskDetail, err: err}
}()
select {
case res := <-resultChan:
if res.err != nil {
glog.Errorf("DEBUG ShowTaskDetail: error getting task detail for %s: %v", taskID, res.err)
c.String(http.StatusNotFound, "Task not found: %s (Error: %v)", taskID, res.err)
return
}
glog.Infof("DEBUG ShowTaskDetail: got task detail for %s, task type: %s, status: %s", taskID, res.taskDetail.Task.Type, res.taskDetail.Task.Status)
c.Header("Content-Type", "text/html")
taskDetailComponent := app.TaskDetail(res.taskDetail)
layoutComponent := layout.Layout(c, taskDetailComponent)
err := layoutComponent.Render(ctx, c.Writer)
if err != nil {
glog.Errorf("DEBUG ShowTaskDetail: render error: %v", err)
c.String(http.StatusInternalServerError, "Failed to render template: %v", err)
return
}
glog.Infof("DEBUG ShowTaskDetail: template rendered successfully for task %s", taskID)
case <-ctx.Done():
glog.Warningf("ShowTaskDetail: timeout waiting for task detail data for task %s", taskID)
c.JSON(http.StatusRequestTimeout, gin.H{
"error": "Request timeout - task detail retrieval took too long. This may indicate the worker is unresponsive or stuck.",
"suggestion": "Try refreshing the page or check if the worker executing this task is responsive. If the task is stuck, it may need to be cancelled manually.",
"task_id": taskID,
})
return
}
glog.Infof("DEBUG ShowTaskDetail: got task detail for %s, task type: %s, status: %s", taskID, taskDetail.Task.Type, taskDetail.Task.Status)
c.Header("Content-Type", "text/html")
taskDetailComponent := app.TaskDetail(taskDetail)
layoutComponent := layout.Layout(c, taskDetailComponent)
err = layoutComponent.Render(c.Request.Context(), c.Writer)
if err != nil {
glog.Errorf("DEBUG ShowTaskDetail: render error: %v", err)
c.String(http.StatusInternalServerError, "Failed to render template: %v", err)
return
}
glog.Infof("DEBUG ShowTaskDetail: template rendered successfully for task %s", taskID)
}
// ShowMaintenanceQueue displays the maintenance queue page