mirror of
https://github.com/seaweedfs/seaweedfs.git
synced 2025-11-08 12:04:46 +08:00
fix: Increase fetch batch sizes to utilize available maxBytes capacity
PROBLEM: Consumer throughput only 36.80 msgs/sec vs producer 50.21 msgs/sec. Test shows messages consumed at 73% of production rate. ROOT CAUSE: FetchMultipleBatches was hardcoded to fetch only: - 10 records per batch (5.1 KB per batch with 512-byte messages) - 10 batches max per fetch (~51 KB total per fetch) But clients request 10 MB per fetch! - Utilization: 0.5% of requested capacity - Massive inefficiency causing slow consumer throughput Analysis: - Client requests: 10 MB per fetch (FetchSize: 10e6) - Server returns: ~51 KB per fetch (200x less!) - Batches: 10 records each (way too small) - Result: Consumer falls behind producer by 26% FIX: Calculate optimal batch size based on maxBytes: - recordsPerBatch = (maxBytes - overhead) / estimatedMsgSize - Start with 9.8MB / 1024 bytes = ~9,600 records per fetch - Min 100 records, max 10,000 records per batch - Scale max batches based on available space - Adaptive sizing for remaining bytes EXPECTED IMPACT: - Consumer throughput: 36.80 → ~48+ msgs/sec (match producer) - Fetch efficiency: 0.5% → ~98% of maxBytes - Message loss: 45% → near 0% This is critical for matching Kafka semantics where clients specify fetch sizes and the broker should honor them.
This commit is contained in:
@@ -57,9 +57,25 @@ func (f *MultiBatchFetcher) FetchMultipleBatches(ctx context.Context, topicName
|
||||
totalSize := int32(0)
|
||||
batchCount := 0
|
||||
|
||||
// Parameters for batch fetching - start smaller to respect maxBytes better
|
||||
recordsPerBatch := int32(10) // Start with smaller batch size
|
||||
maxBatchesPerFetch := 10 // Limit number of batches to avoid infinite loops
|
||||
// Estimate records per batch based on maxBytes available
|
||||
// Assume average message size + batch overhead
|
||||
// Client requested maxBytes, we should use most of it
|
||||
// Start with larger batches to maximize throughput
|
||||
estimatedMsgSize := int32(1024) // Typical message size with overhead
|
||||
recordsPerBatch := (maxBytes - 200) / estimatedMsgSize // Use available space efficiently
|
||||
if recordsPerBatch < 100 {
|
||||
recordsPerBatch = 100 // Minimum 100 records per batch
|
||||
}
|
||||
if recordsPerBatch > 10000 {
|
||||
recordsPerBatch = 10000 // Cap at 10k records per batch to avoid huge memory allocations
|
||||
}
|
||||
maxBatchesPerFetch := int((maxBytes - 200) / (estimatedMsgSize * 10)) // Reasonable limit
|
||||
if maxBatchesPerFetch < 5 {
|
||||
maxBatchesPerFetch = 5 // At least 5 batches
|
||||
}
|
||||
if maxBatchesPerFetch > 100 {
|
||||
maxBatchesPerFetch = 100 // At most 100 batches
|
||||
}
|
||||
|
||||
for batchCount < maxBatchesPerFetch && currentOffset < highWaterMark {
|
||||
|
||||
@@ -70,8 +86,13 @@ func (f *MultiBatchFetcher) FetchMultipleBatches(ctx context.Context, topicName
|
||||
}
|
||||
|
||||
// Adapt records per batch based on remaining space
|
||||
if remainingBytes < 1000 {
|
||||
recordsPerBatch = 10 // Smaller batches when space is limited
|
||||
// If we have less space remaining, fetch fewer records to avoid going over
|
||||
currentBatchSize := recordsPerBatch
|
||||
if remainingBytes < recordsPerBatch*estimatedMsgSize {
|
||||
currentBatchSize = remainingBytes / estimatedMsgSize
|
||||
if currentBatchSize < 1 {
|
||||
currentBatchSize = 1
|
||||
}
|
||||
}
|
||||
|
||||
// Calculate how many records to fetch for this batch
|
||||
@@ -80,7 +101,7 @@ func (f *MultiBatchFetcher) FetchMultipleBatches(ctx context.Context, topicName
|
||||
break
|
||||
}
|
||||
|
||||
recordsToFetch := recordsPerBatch
|
||||
recordsToFetch := currentBatchSize
|
||||
if int64(recordsToFetch) > recordsAvailable {
|
||||
recordsToFetch = int32(recordsAvailable)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user