only recreate if we need to seek backward (requested offset < current offset), not on any mismatch

This commit is contained in:
chrislu
2025-10-14 00:26:55 -07:00
parent 6947d906a8
commit 6ef2f66198
4 changed files with 152 additions and 104 deletions

View File

@@ -140,6 +140,10 @@ func (k *KafkaGoClient) ConsumeWithGroup(topicName, groupID string, expectedCoun
})
defer reader.Close()
// Log the initial offset position
offset := reader.Offset()
k.t.Logf("Consumer group reader created for group %s, initial offset: %d", groupID, offset)
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
defer cancel()
@@ -151,14 +155,17 @@ func (k *KafkaGoClient) ConsumeWithGroup(topicName, groupID string, expectedCoun
return messages, fmt.Errorf("read message %d: %w", i, err)
}
messages = append(messages, msg)
k.t.Logf(" Fetched message %d: offset=%d, partition=%d", i, msg.Offset, msg.Partition)
// Commit with simple retry to handle transient connection churn
var commitErr error
for attempt := 0; attempt < 3; attempt++ {
commitErr = reader.CommitMessages(ctx, msg)
if commitErr == nil {
k.t.Logf(" ✓ Committed offset %d (attempt %d)", msg.Offset, attempt+1)
break
}
k.t.Logf(" × Commit attempt %d failed for offset %d: %v", attempt+1, msg.Offset, commitErr)
// brief backoff
time.Sleep(time.Duration(50*(1<<attempt)) * time.Millisecond)
}

View File

@@ -95,74 +95,79 @@ func (bc *BrokerClient) GetOrCreateSubscriber(topic string, partition int32, sta
bc.subscribersLock.RLock()
if session, exists := bc.subscribers[key]; exists {
// Check if we need to recreate the session
if session.StartOffset != startOffset {
// CRITICAL FIX: Check cache first before recreating
// If the requested offset is in cache, we can reuse the session
session.mu.Lock()
canUseCache := false
// CRITICAL: Only recreate if we need to seek BACKWARD
// Forward reads (startOffset >= session.StartOffset) can use the existing session
session.mu.Lock()
currentOffset := session.StartOffset
session.mu.Unlock()
if len(session.consumedRecords) > 0 {
cacheStartOffset := session.consumedRecords[0].Offset
cacheEndOffset := session.consumedRecords[len(session.consumedRecords)-1].Offset
if startOffset >= cacheStartOffset && startOffset <= cacheEndOffset {
canUseCache = true
glog.V(2).Infof("[FETCH] Session offset mismatch for %s (session=%d, requested=%d), but offset is in cache [%d-%d]",
key, session.StartOffset, startOffset, cacheStartOffset, cacheEndOffset)
}
}
session.mu.Unlock()
if canUseCache {
// Offset is in cache, reuse session
bc.subscribersLock.RUnlock()
return session, nil
}
// Not in cache - need to recreate session at the requested offset
glog.V(0).Infof("[FETCH] Recreating session for %s: session at %d, requested %d (not in cache)",
key, session.StartOffset, startOffset)
if startOffset >= currentOffset {
// Can read forward from existing session, or already at requested offset
bc.subscribersLock.RUnlock()
glog.V(2).Infof("[FETCH] Reusing existing session for %s: session at %d, requested %d (can read forward)",
key, currentOffset, startOffset)
return session, nil
}
// Close and delete the old session
bc.subscribersLock.Lock()
// CRITICAL: Double-check if another thread already recreated the session at the desired offset
// This prevents multiple concurrent threads from all trying to recreate the same session
if existingSession, exists := bc.subscribers[key]; exists {
existingSession.mu.Lock()
existingOffset := existingSession.StartOffset
existingSession.mu.Unlock()
// Check if the session was already recreated at (or before) the requested offset
if existingOffset <= startOffset {
bc.subscribersLock.Unlock()
glog.V(1).Infof("[FETCH] Session already recreated by another thread at offset %d (requested %d)", existingOffset, startOffset)
// Re-acquire the existing session and continue
return existingSession, nil
}
// Session still needs recreation - close it
if existingSession.Stream != nil {
_ = existingSession.Stream.CloseSend()
}
if existingSession.Cancel != nil {
existingSession.Cancel()
}
delete(bc.subscribers, key)
// startOffset < currentOffset: need to seek backward
// Check cache first before recreating
session.mu.Lock()
canUseCache := false
if len(session.consumedRecords) > 0 {
cacheStartOffset := session.consumedRecords[0].Offset
cacheEndOffset := session.consumedRecords[len(session.consumedRecords)-1].Offset
if startOffset >= cacheStartOffset && startOffset <= cacheEndOffset {
canUseCache = true
glog.V(2).Infof("[FETCH] Session for %s at offset %d, requested %d (backward seek), but offset is in cache [%d-%d]",
key, currentOffset, startOffset, cacheStartOffset, cacheEndOffset)
}
// CRITICAL FIX: Don't unlock here! Keep the write lock to prevent race condition
// where another thread creates a session at the wrong offset between our delete and create
// Fall through to session creation below while holding the lock
// bc.subscribersLock.Unlock() - REMOVED to fix race condition
}
session.mu.Unlock()
// Write lock is already held - skip to session creation
goto createSession
} else {
// Exact match - reuse
if canUseCache {
// Offset is in cache, reuse session
bc.subscribersLock.RUnlock()
return session, nil
}
// Not in cache - need to recreate session at the requested offset
glog.V(0).Infof("[FETCH] Recreating session for %s: session at %d, requested %d (backward seek, not in cache)",
key, currentOffset, startOffset)
bc.subscribersLock.RUnlock()
// Close and delete the old session
bc.subscribersLock.Lock()
// CRITICAL: Double-check if another thread already recreated the session at the desired offset
// This prevents multiple concurrent threads from all trying to recreate the same session
if existingSession, exists := bc.subscribers[key]; exists {
existingSession.mu.Lock()
existingOffset := existingSession.StartOffset
existingSession.mu.Unlock()
// Check if the session was already recreated at (or before) the requested offset
if existingOffset <= startOffset {
bc.subscribersLock.Unlock()
glog.V(1).Infof("[FETCH] Session already recreated by another thread at offset %d (requested %d)", existingOffset, startOffset)
// Re-acquire the existing session and continue
return existingSession, nil
}
// Session still needs recreation - close it
if existingSession.Stream != nil {
_ = existingSession.Stream.CloseSend()
}
if existingSession.Cancel != nil {
existingSession.Cancel()
}
delete(bc.subscribers, key)
}
// CRITICAL FIX: Don't unlock here! Keep the write lock to prevent race condition
// where another thread creates a session at the wrong offset between our delete and create
// Fall through to session creation below while holding the lock
// bc.subscribersLock.Unlock() - REMOVED to fix race condition
// Write lock is already held - skip to session creation
goto createSession
} else {
bc.subscribersLock.RUnlock()
}
@@ -348,25 +353,28 @@ func (bc *BrokerClient) ReadRecordsFromOffset(ctx context.Context, session *Brok
key := session.Key()
session.mu.Unlock()
// CRITICAL FIX: Acquire the global lock FIRST, then re-check the session offset
// This prevents multiple threads from all deciding to recreate based on stale data
glog.V(1).Infof("[FETCH] Acquiring global lock to recreate session %s: requested=%d", key, requestedOffset)
bc.subscribersLock.Lock()
// CRITICAL FIX: Acquire the global lock FIRST, then re-check the session offset
// This prevents multiple threads from all deciding to recreate based on stale data
glog.V(0).Infof("[FETCH] 🔒 Thread acquiring global lock to recreate session %s: requested=%d", key, requestedOffset)
bc.subscribersLock.Lock()
glog.V(0).Infof("[FETCH] 🔓 Thread acquired global lock for session %s: requested=%d", key, requestedOffset)
// Double-check if another thread already recreated the session at the desired offset
// This prevents multiple concurrent threads from all trying to recreate the same session
if existingSession, exists := bc.subscribers[key]; exists {
existingSession.mu.Lock()
existingOffset := existingSession.StartOffset
existingSession.mu.Unlock()
// Double-check if another thread already recreated the session at the desired offset
// This prevents multiple concurrent threads from all trying to recreate the same session
if existingSession, exists := bc.subscribers[key]; exists {
existingSession.mu.Lock()
existingOffset := existingSession.StartOffset
existingSession.mu.Unlock()
// Check if the session was already recreated at (or before) the requested offset
if existingOffset <= requestedOffset {
bc.subscribersLock.Unlock()
glog.V(0).Infof("[FETCH] Session %s already recreated by another thread at offset %d (requested %d) - reusing", key, existingOffset, requestedOffset)
// Re-acquire the existing session and continue
return bc.ReadRecordsFromOffset(ctx, existingSession, requestedOffset, maxRecords)
}
// Check if the session was already recreated at (or before) the requested offset
if existingOffset <= requestedOffset {
bc.subscribersLock.Unlock()
glog.V(0).Infof("[FETCH] Session %s already recreated by another thread at offset %d (requested %d) - reusing", key, existingOffset, requestedOffset)
// Re-acquire the existing session and continue
return bc.ReadRecordsFromOffset(ctx, existingSession, requestedOffset, maxRecords)
}
glog.V(0).Infof("[FETCH] ⚠️ Session %s still at wrong offset %d (requested %d) - must recreate", key, existingOffset, requestedOffset)
// Session still needs recreation - close it
if existingSession.Stream != nil {
@@ -447,13 +455,13 @@ func (bc *BrokerClient) ReadRecordsFromOffset(ctx context.Context, session *Brok
Cancel: subscriberCancel,
}
bc.subscribers[key] = newSession
bc.subscribersLock.Unlock()
glog.V(0).Infof("[FETCH] ✓ Created fresh subscriber session for backward seek: %s at offset %d", key, requestedOffset)
bc.subscribers[key] = newSession
bc.subscribersLock.Unlock()
glog.V(0).Infof("[FETCH] ✓ Created fresh subscriber session for backward seek: %s at offset %d", key, requestedOffset)
// Read from fresh subscriber
glog.V(1).Infof("[FETCH] Reading from fresh subscriber %s at offset %d (maxRecords=%d)", key, requestedOffset, maxRecords)
return bc.ReadRecords(ctx, newSession, maxRecords)
// Read from fresh subscriber
glog.V(1).Infof("[FETCH] Reading from fresh subscriber %s at offset %d (maxRecords=%d)", key, requestedOffset, maxRecords)
return bc.ReadRecords(ctx, newSession, maxRecords)
}
// requestedOffset >= session.StartOffset: Keep reading forward from existing session
@@ -645,6 +653,8 @@ func (bc *BrokerClient) ReadRecords(ctx context.Context, session *BrokerSubscrib
if result.err != nil {
glog.V(2).Infof("[FETCH] Stream.Recv() error after %d records: %v", len(records), result.err)
// Update session offset before returning
glog.V(0).Infof("[FETCH] 📍 Updating %s offset: %d → %d (error case, read %d records)",
session.Key(), session.StartOffset, currentOffset, len(records))
session.StartOffset = currentOffset
return records, nil
}
@@ -669,6 +679,8 @@ func (bc *BrokerClient) ReadRecords(ctx context.Context, session *BrokerSubscrib
// Timeout - return what we have
glog.V(4).Infof("[FETCH] Read timeout after %d records (waited %v), returning batch", len(records), time.Since(readStart))
// CRITICAL: Update session offset so next fetch knows where we left off
glog.V(0).Infof("[FETCH] 📍 Updating %s offset: %d → %d (timeout case, read %d records)",
session.Key(), session.StartOffset, currentOffset, len(records))
session.StartOffset = currentOffset
return records, nil
}
@@ -676,6 +688,8 @@ func (bc *BrokerClient) ReadRecords(ctx context.Context, session *BrokerSubscrib
glog.V(2).Infof("[FETCH] ReadRecords returning %d records (maxRecords reached)", len(records))
// Update session offset after successful read
glog.V(0).Infof("[FETCH] 📍 Updating %s offset: %d → %d (success case, read %d records)",
session.Key(), session.StartOffset, currentOffset, len(records))
session.StartOffset = currentOffset
// CRITICAL: Cache the consumed records to avoid broker tight loop

View File

@@ -5,6 +5,7 @@ import (
"fmt"
"time"
"github.com/seaweedfs/seaweedfs/weed/glog"
"github.com/seaweedfs/seaweedfs/weed/mq/kafka/consumer"
)
@@ -151,16 +152,27 @@ func (h *Handler) handleOffsetCommit(correlationID uint32, apiVersion uint16, re
ConsumerGroupInstance: req.GroupInstanceID,
}
// Commit offset using SMQ storage (persistent to filer)
// Commit offset to both in-memory and SMQ storage
var errCode int16 = ErrorCodeNone
if generationMatches {
if err := h.commitOffsetToSMQ(key, p.Offset, p.Metadata); err != nil {
// Store in in-memory map first (works for both mock and SMQ backends)
if err := h.commitOffset(group, t.Name, p.Index, p.Offset, p.Metadata); err != nil {
errCode = ErrorCodeOffsetMetadataTooLarge
} else {
}
// Also store in SMQ persistent storage if available
if err := h.commitOffsetToSMQ(key, p.Offset, p.Metadata); err != nil {
// SMQ storage may not be available (e.g., in mock mode) - that's okay
glog.V(2).Infof("[OFFSET_COMMIT] SMQ storage not available: %v", err)
}
glog.V(0).Infof("[OFFSET_COMMIT] Committed: group=%s topic=%s partition=%d offset=%d",
req.GroupID, t.Name, p.Index, p.Offset)
} else {
// Do not store commit if generation mismatch
errCode = 22 // IllegalGeneration
glog.V(0).Infof("[OFFSET_COMMIT] Generation mismatch: group=%s expected=%d got=%d",
req.GroupID, group.Generation, req.GenerationID)
}
topicResp.Partitions = append(topicResp.Partitions, OffsetCommitPartitionResponse{
@@ -190,12 +202,15 @@ func (h *Handler) handleOffsetFetch(correlationID uint32, apiVersion uint16, req
// Get consumer group
group := h.groupCoordinator.GetGroup(request.GroupID)
if group == nil {
glog.V(0).Infof("[OFFSET_FETCH] Group not found: %s", request.GroupID)
return h.buildOffsetFetchErrorResponse(correlationID, ErrorCodeInvalidGroupID), nil
}
group.Mu.RLock()
defer group.Mu.RUnlock()
glog.V(0).Infof("[OFFSET_FETCH] Request: group=%s topics=%v", request.GroupID, request.Topics)
// Build response
response := OffsetFetchResponse{
CorrelationID: correlationID,
@@ -222,25 +237,35 @@ func (h *Handler) handleOffsetFetch(correlationID uint32, apiVersion uint16, req
// Fetch offsets for requested partitions
for _, partition := range partitionsToFetch {
// Create consumer offset key for SMQ storage
key := ConsumerOffsetKey{
Topic: topic.Name,
Partition: partition,
ConsumerGroup: request.GroupID,
ConsumerGroupInstance: request.GroupInstanceID,
}
var fetchedOffset int64 = -1
var metadata string = ""
var errorCode int16 = ErrorCodeNone
// Fetch offset directly from SMQ storage (persistent storage)
// No cache needed - offset fetching is infrequent compared to commits
if off, meta, err := h.fetchOffsetFromSMQ(key); err == nil && off >= 0 {
// Try fetching from in-memory cache first (works for both mock and SMQ backends)
if off, meta, err := h.fetchOffset(group, topic.Name, partition); err == nil && off >= 0 {
fetchedOffset = off
metadata = meta
glog.V(0).Infof("[OFFSET_FETCH] Found in memory: group=%s topic=%s partition=%d offset=%d",
request.GroupID, topic.Name, partition, off)
} else {
// No offset found in persistent storage (-1 indicates no committed offset)
// Fallback: try fetching from SMQ persistent storage
// This handles cases where offsets are stored in SMQ but not yet loaded into memory
key := ConsumerOffsetKey{
Topic: topic.Name,
Partition: partition,
ConsumerGroup: request.GroupID,
ConsumerGroupInstance: request.GroupInstanceID,
}
if off, meta, err := h.fetchOffsetFromSMQ(key); err == nil && off >= 0 {
fetchedOffset = off
metadata = meta
glog.V(0).Infof("[OFFSET_FETCH] Found in SMQ: group=%s topic=%s partition=%d offset=%d",
request.GroupID, topic.Name, partition, off)
} else {
glog.V(0).Infof("[OFFSET_FETCH] No offset found: group=%s topic=%s partition=%d",
request.GroupID, topic.Name, partition)
}
// No offset found in either location (-1 indicates no committed offset)
}
partitionResponse := OffsetFetchPartitionResponse{
@@ -250,6 +275,8 @@ func (h *Handler) handleOffsetFetch(correlationID uint32, apiVersion uint16, req
Metadata: metadata,
ErrorCode: errorCode,
}
glog.V(0).Infof("[OFFSET_FETCH] Returning: group=%s topic=%s partition=%d offset=%d",
request.GroupID, topic.Name, partition, fetchedOffset)
topicResponse.Partitions = append(topicResponse.Partitions, partitionResponse)
}