mirror of
https://github.com/seaweedfs/seaweedfs.git
synced 2025-09-20 02:19:23 +08:00
combine parquet results with live logs
This commit is contained in:
@@ -1376,11 +1376,18 @@ func (e *SQLEngine) tryFastParquetAggregation(ctx context.Context, hybridScanner
|
||||
}
|
||||
|
||||
// Get all partitions for this topic
|
||||
partitions, err := e.discoverTopicPartitions(hybridScanner.topic.Namespace, hybridScanner.topic.Name)
|
||||
relativePartitions, err := e.discoverTopicPartitions(hybridScanner.topic.Namespace, hybridScanner.topic.Name)
|
||||
if err != nil {
|
||||
return nil, false
|
||||
}
|
||||
|
||||
// Convert relative partition paths to full paths
|
||||
topicBasePath := fmt.Sprintf("/topics/%s/%s", hybridScanner.topic.Namespace, hybridScanner.topic.Name)
|
||||
partitions := make([]string, len(relativePartitions))
|
||||
for i, relPartition := range relativePartitions {
|
||||
partitions[i] = fmt.Sprintf("%s/%s", topicBasePath, relPartition)
|
||||
}
|
||||
|
||||
// Collect statistics from all partitions (both parquet and live logs)
|
||||
allFileStats := make(map[string][]*ParquetFileStats) // partitionPath -> parquet file stats
|
||||
totalParquetRowCount := int64(0)
|
||||
@@ -1448,10 +1455,12 @@ func (e *SQLEngine) tryFastParquetAggregation(ctx context.Context, hybridScanner
|
||||
}
|
||||
|
||||
case "MIN":
|
||||
// Find global minimum across all files and partitions
|
||||
// Hybrid approach: combine parquet statistics with live log scanning
|
||||
var globalMin interface{}
|
||||
var globalMinValue *schema_pb.Value
|
||||
hasParquetStats := false
|
||||
|
||||
// Step 1: Get minimum from parquet statistics
|
||||
for _, fileStats := range allFileStats {
|
||||
for _, fileStat := range fileStats {
|
||||
if colStats, exists := fileStat.ColumnStats[spec.Column]; exists {
|
||||
@@ -1459,22 +1468,55 @@ func (e *SQLEngine) tryFastParquetAggregation(ctx context.Context, hybridScanner
|
||||
globalMinValue = colStats.MinValue
|
||||
globalMin = e.extractRawValue(colStats.MinValue)
|
||||
}
|
||||
hasParquetStats = true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Handle system columns that aren't in parquet column stats
|
||||
if globalMin == nil {
|
||||
// Step 2: Get minimum from live log data in each partition
|
||||
for _, partition := range partitions {
|
||||
// Get parquet source files for this partition (for deduplication)
|
||||
partitionParquetSources := make(map[string]bool)
|
||||
if partitionFileStats, exists := allFileStats[partition]; exists {
|
||||
partitionParquetSources = e.extractParquetSourceFiles(partitionFileStats)
|
||||
}
|
||||
|
||||
// Scan live log files for MIN value
|
||||
liveLogMin, _, err := e.computeLiveLogMinMax(partition, spec.Column, partitionParquetSources)
|
||||
if err != nil {
|
||||
fmt.Printf("Warning: failed to compute live log min for partition %s: %v\n", partition, err)
|
||||
continue
|
||||
}
|
||||
|
||||
// Update global minimum if live log has a smaller value
|
||||
if liveLogMin != nil {
|
||||
if globalMin == nil {
|
||||
globalMin = liveLogMin
|
||||
} else {
|
||||
// Compare live log min with current global min
|
||||
liveLogSchemaValue := e.convertRawValueToSchemaValue(liveLogMin)
|
||||
if e.compareValues(liveLogSchemaValue, globalMinValue) < 0 {
|
||||
globalMin = liveLogMin
|
||||
globalMinValue = liveLogSchemaValue
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Step 3: Handle system columns that aren't in parquet column stats
|
||||
if globalMin == nil && !hasParquetStats {
|
||||
globalMin = e.getSystemColumnGlobalMin(spec.Column, allFileStats)
|
||||
}
|
||||
|
||||
aggResults[i].Min = globalMin
|
||||
|
||||
case "MAX":
|
||||
// Find global maximum across all files and partitions
|
||||
// Hybrid approach: combine parquet statistics with live log scanning
|
||||
var globalMax interface{}
|
||||
var globalMaxValue *schema_pb.Value
|
||||
hasParquetStats := false
|
||||
|
||||
// Step 1: Get maximum from parquet statistics
|
||||
for _, fileStats := range allFileStats {
|
||||
for _, fileStat := range fileStats {
|
||||
if colStats, exists := fileStat.ColumnStats[spec.Column]; exists {
|
||||
@@ -1482,12 +1524,43 @@ func (e *SQLEngine) tryFastParquetAggregation(ctx context.Context, hybridScanner
|
||||
globalMaxValue = colStats.MaxValue
|
||||
globalMax = e.extractRawValue(colStats.MaxValue)
|
||||
}
|
||||
hasParquetStats = true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Handle system columns that aren't in parquet column stats
|
||||
if globalMax == nil {
|
||||
// Step 2: Get maximum from live log data in each partition
|
||||
for _, partition := range partitions {
|
||||
// Get parquet source files for this partition (for deduplication)
|
||||
partitionParquetSources := make(map[string]bool)
|
||||
if partitionFileStats, exists := allFileStats[partition]; exists {
|
||||
partitionParquetSources = e.extractParquetSourceFiles(partitionFileStats)
|
||||
}
|
||||
|
||||
// Scan live log files for MAX value
|
||||
_, liveLogMax, err := e.computeLiveLogMinMax(partition, spec.Column, partitionParquetSources)
|
||||
if err != nil {
|
||||
fmt.Printf("Warning: failed to compute live log max for partition %s: %v\n", partition, err)
|
||||
continue
|
||||
}
|
||||
|
||||
// Update global maximum if live log has a larger value
|
||||
if liveLogMax != nil {
|
||||
if globalMax == nil {
|
||||
globalMax = liveLogMax
|
||||
} else {
|
||||
// Compare live log max with current global max
|
||||
liveLogSchemaValue := e.convertRawValueToSchemaValue(liveLogMax)
|
||||
if e.compareValues(liveLogSchemaValue, globalMaxValue) > 0 {
|
||||
globalMax = liveLogMax
|
||||
globalMaxValue = liveLogSchemaValue
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Step 3: Handle system columns that aren't in parquet column stats
|
||||
if globalMax == nil && !hasParquetStats {
|
||||
globalMax = e.getSystemColumnGlobalMax(spec.Column, allFileStats)
|
||||
}
|
||||
|
||||
@@ -1516,6 +1589,283 @@ func (e *SQLEngine) tryFastParquetAggregation(ctx context.Context, hybridScanner
|
||||
return result, true
|
||||
}
|
||||
|
||||
// computeLiveLogMinMax scans live log files to find MIN/MAX values for a specific column
|
||||
func (e *SQLEngine) computeLiveLogMinMax(partitionPath string, columnName string, parquetSourceFiles map[string]bool) (interface{}, interface{}, error) {
|
||||
if e.catalog.brokerClient == nil {
|
||||
return nil, nil, fmt.Errorf("no broker client available")
|
||||
}
|
||||
|
||||
filerClient, err := e.catalog.brokerClient.GetFilerClient()
|
||||
if err != nil {
|
||||
return nil, nil, fmt.Errorf("failed to get filer client: %v", err)
|
||||
}
|
||||
|
||||
var minValue, maxValue interface{}
|
||||
var minSchemaValue, maxSchemaValue *schema_pb.Value
|
||||
|
||||
// Process each live log file
|
||||
err = filer_pb.ReadDirAllEntries(context.Background(), filerClient, util.FullPath(partitionPath), "", func(entry *filer_pb.Entry, isLast bool) error {
|
||||
// Skip parquet files and directories
|
||||
if entry.IsDirectory || strings.HasSuffix(entry.Name, ".parquet") {
|
||||
return nil
|
||||
}
|
||||
// Skip files that have been converted to parquet (deduplication)
|
||||
if parquetSourceFiles[entry.Name] {
|
||||
return nil
|
||||
}
|
||||
|
||||
filePath := partitionPath + "/" + entry.Name
|
||||
|
||||
// Scan this log file for MIN/MAX values
|
||||
fileMin, fileMax, err := e.computeFileMinMax(filerClient, filePath, columnName)
|
||||
if err != nil {
|
||||
fmt.Printf("Warning: failed to compute min/max for file %s: %v\n", filePath, err)
|
||||
return nil // Continue with other files
|
||||
}
|
||||
|
||||
// Update global min/max
|
||||
if fileMin != nil {
|
||||
if minSchemaValue == nil || e.compareValues(fileMin, minSchemaValue) < 0 {
|
||||
minSchemaValue = fileMin
|
||||
minValue = e.extractRawValue(fileMin)
|
||||
}
|
||||
}
|
||||
|
||||
if fileMax != nil {
|
||||
if maxSchemaValue == nil || e.compareValues(fileMax, maxSchemaValue) > 0 {
|
||||
maxSchemaValue = fileMax
|
||||
maxValue = e.extractRawValue(fileMax)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
return nil, nil, fmt.Errorf("failed to process partition directory %s: %v", partitionPath, err)
|
||||
}
|
||||
|
||||
return minValue, maxValue, nil
|
||||
}
|
||||
|
||||
// computeFileMinMax scans a single log file to find MIN/MAX values for a specific column
|
||||
func (e *SQLEngine) computeFileMinMax(filerClient filer_pb.FilerClient, filePath string, columnName string) (*schema_pb.Value, *schema_pb.Value, error) {
|
||||
var minValue, maxValue *schema_pb.Value
|
||||
|
||||
err := e.eachLogEntryInFile(filerClient, filePath, func(logEntry *filer_pb.LogEntry) error {
|
||||
// Convert log entry to record value
|
||||
recordValue, _, err := e.convertLogEntryToRecordValue(logEntry)
|
||||
if err != nil {
|
||||
return err // This will stop processing this file but not fail the overall query
|
||||
}
|
||||
|
||||
// Extract the requested column value
|
||||
var columnValue *schema_pb.Value
|
||||
if e.isSystemColumn(columnName) {
|
||||
// Handle system columns
|
||||
switch strings.ToLower(columnName) {
|
||||
case "_timestamp_ns", "timestamp_ns":
|
||||
columnValue = &schema_pb.Value{Kind: &schema_pb.Value_Int64Value{Int64Value: logEntry.TsNs}}
|
||||
case "_key", "key":
|
||||
columnValue = &schema_pb.Value{Kind: &schema_pb.Value_BytesValue{BytesValue: logEntry.Key}}
|
||||
case "_source", "source":
|
||||
columnValue = &schema_pb.Value{Kind: &schema_pb.Value_StringValue{StringValue: "live_log"}}
|
||||
}
|
||||
} else {
|
||||
// Handle regular data columns
|
||||
if value, exists := recordValue.Fields[columnName]; exists {
|
||||
columnValue = value
|
||||
}
|
||||
}
|
||||
|
||||
if columnValue == nil {
|
||||
return nil // Skip this record
|
||||
}
|
||||
|
||||
// Update min/max
|
||||
if minValue == nil || e.compareValues(columnValue, minValue) < 0 {
|
||||
minValue = columnValue
|
||||
}
|
||||
if maxValue == nil || e.compareValues(columnValue, maxValue) > 0 {
|
||||
maxValue = columnValue
|
||||
}
|
||||
|
||||
return nil
|
||||
})
|
||||
|
||||
return minValue, maxValue, err
|
||||
}
|
||||
|
||||
// eachLogEntryInFile reads a log file and calls the provided function for each log entry
|
||||
func (e *SQLEngine) eachLogEntryInFile(filerClient filer_pb.FilerClient, filePath string, fn func(*filer_pb.LogEntry) error) error {
|
||||
// Extract directory and filename
|
||||
// filePath is like "partitionPath/filename"
|
||||
lastSlash := strings.LastIndex(filePath, "/")
|
||||
if lastSlash == -1 {
|
||||
return fmt.Errorf("invalid file path: %s", filePath)
|
||||
}
|
||||
|
||||
dirPath := filePath[:lastSlash]
|
||||
fileName := filePath[lastSlash+1:]
|
||||
|
||||
// Get file entry
|
||||
var fileEntry *filer_pb.Entry
|
||||
err := filer_pb.ReadDirAllEntries(context.Background(), filerClient, util.FullPath(dirPath), "", func(entry *filer_pb.Entry, isLast bool) error {
|
||||
if entry.Name == fileName {
|
||||
fileEntry = entry
|
||||
}
|
||||
return nil
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to find file %s: %v", filePath, err)
|
||||
}
|
||||
|
||||
if fileEntry == nil {
|
||||
return fmt.Errorf("file not found: %s", filePath)
|
||||
}
|
||||
|
||||
lookupFileIdFn := filer.LookupFn(filerClient)
|
||||
|
||||
// eachChunkFn processes each chunk's data (pattern from countRowsInLogFile)
|
||||
eachChunkFn := func(buf []byte) error {
|
||||
for pos := 0; pos+4 < len(buf); {
|
||||
size := util.BytesToUint32(buf[pos : pos+4])
|
||||
if pos+4+int(size) > len(buf) {
|
||||
break
|
||||
}
|
||||
|
||||
entryData := buf[pos+4 : pos+4+int(size)]
|
||||
|
||||
logEntry := &filer_pb.LogEntry{}
|
||||
if err := proto.Unmarshal(entryData, logEntry); err != nil {
|
||||
pos += 4 + int(size)
|
||||
continue // Skip corrupted entries
|
||||
}
|
||||
|
||||
// Call the provided function for each log entry
|
||||
if err := fn(logEntry); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
pos += 4 + int(size)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Read file chunks and process them (pattern from countRowsInLogFile)
|
||||
fileSize := filer.FileSize(fileEntry)
|
||||
visibleIntervals, _ := filer.NonOverlappingVisibleIntervals(context.Background(), lookupFileIdFn, fileEntry.Chunks, 0, int64(fileSize))
|
||||
chunkViews := filer.ViewFromVisibleIntervals(visibleIntervals, 0, int64(fileSize))
|
||||
|
||||
for x := chunkViews.Front(); x != nil; x = x.Next {
|
||||
chunk := x.Value
|
||||
urlStrings, err := lookupFileIdFn(context.Background(), chunk.FileId)
|
||||
if err != nil {
|
||||
fmt.Printf("Warning: failed to lookup chunk %s: %v\n", chunk.FileId, err)
|
||||
continue
|
||||
}
|
||||
|
||||
if len(urlStrings) == 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
// Read chunk data
|
||||
// urlStrings[0] is already a complete URL (http://server:port/fileId)
|
||||
data, _, err := util_http.Get(urlStrings[0])
|
||||
if err != nil {
|
||||
fmt.Printf("Warning: failed to read chunk %s from %s: %v\n", chunk.FileId, urlStrings[0], err)
|
||||
continue
|
||||
}
|
||||
|
||||
// Process this chunk
|
||||
if err := eachChunkFn(data); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// convertLogEntryToRecordValue helper method (reuse existing logic)
|
||||
func (e *SQLEngine) convertLogEntryToRecordValue(logEntry *filer_pb.LogEntry) (*schema_pb.RecordValue, string, error) {
|
||||
// Parse the log entry data as JSON
|
||||
var jsonData map[string]interface{}
|
||||
if err := json.Unmarshal(logEntry.Data, &jsonData); err != nil {
|
||||
return nil, "", fmt.Errorf("failed to parse log entry JSON: %v", err)
|
||||
}
|
||||
|
||||
// Create record value with system and user fields
|
||||
recordValue := &schema_pb.RecordValue{Fields: make(map[string]*schema_pb.Value)}
|
||||
|
||||
// Add system columns
|
||||
recordValue.Fields[SW_COLUMN_NAME_TS] = &schema_pb.Value{
|
||||
Kind: &schema_pb.Value_Int64Value{Int64Value: logEntry.TsNs},
|
||||
}
|
||||
recordValue.Fields[SW_COLUMN_NAME_KEY] = &schema_pb.Value{
|
||||
Kind: &schema_pb.Value_BytesValue{BytesValue: logEntry.Key},
|
||||
}
|
||||
|
||||
// Add user data fields
|
||||
for fieldName, jsonValue := range jsonData {
|
||||
if fieldName == SW_COLUMN_NAME_TS || fieldName == SW_COLUMN_NAME_KEY {
|
||||
continue // Skip system fields in user data
|
||||
}
|
||||
|
||||
// Convert JSON value to schema value (basic conversion)
|
||||
schemaValue := e.convertJSONValueToSchemaValue(jsonValue)
|
||||
if schemaValue != nil {
|
||||
recordValue.Fields[fieldName] = schemaValue
|
||||
}
|
||||
}
|
||||
|
||||
return recordValue, "live_log", nil
|
||||
}
|
||||
|
||||
// convertJSONValueToSchemaValue converts JSON values to schema_pb.Value
|
||||
func (e *SQLEngine) convertJSONValueToSchemaValue(jsonValue interface{}) *schema_pb.Value {
|
||||
switch v := jsonValue.(type) {
|
||||
case string:
|
||||
return &schema_pb.Value{Kind: &schema_pb.Value_StringValue{StringValue: v}}
|
||||
case float64:
|
||||
// JSON numbers are always float64, try to detect if it's actually an integer
|
||||
if v == float64(int64(v)) {
|
||||
return &schema_pb.Value{Kind: &schema_pb.Value_Int64Value{Int64Value: int64(v)}}
|
||||
}
|
||||
return &schema_pb.Value{Kind: &schema_pb.Value_DoubleValue{DoubleValue: v}}
|
||||
case bool:
|
||||
return &schema_pb.Value{Kind: &schema_pb.Value_BoolValue{BoolValue: v}}
|
||||
case nil:
|
||||
return nil
|
||||
default:
|
||||
// Convert other types to string
|
||||
return &schema_pb.Value{Kind: &schema_pb.Value_StringValue{StringValue: fmt.Sprintf("%v", v)}}
|
||||
}
|
||||
}
|
||||
|
||||
// convertRawValueToSchemaValue converts raw Go values back to schema_pb.Value for comparison
|
||||
func (e *SQLEngine) convertRawValueToSchemaValue(rawValue interface{}) *schema_pb.Value {
|
||||
switch v := rawValue.(type) {
|
||||
case int32:
|
||||
return &schema_pb.Value{Kind: &schema_pb.Value_Int32Value{Int32Value: v}}
|
||||
case int64:
|
||||
return &schema_pb.Value{Kind: &schema_pb.Value_Int64Value{Int64Value: v}}
|
||||
case float32:
|
||||
return &schema_pb.Value{Kind: &schema_pb.Value_FloatValue{FloatValue: v}}
|
||||
case float64:
|
||||
return &schema_pb.Value{Kind: &schema_pb.Value_DoubleValue{DoubleValue: v}}
|
||||
case string:
|
||||
return &schema_pb.Value{Kind: &schema_pb.Value_StringValue{StringValue: v}}
|
||||
case bool:
|
||||
return &schema_pb.Value{Kind: &schema_pb.Value_BoolValue{BoolValue: v}}
|
||||
case []byte:
|
||||
return &schema_pb.Value{Kind: &schema_pb.Value_BytesValue{BytesValue: v}}
|
||||
default:
|
||||
// Convert other types to string as fallback
|
||||
return &schema_pb.Value{Kind: &schema_pb.Value_StringValue{StringValue: fmt.Sprintf("%v", v)}}
|
||||
}
|
||||
}
|
||||
|
||||
// canUseParquetStatsForAggregation determines if an aggregation can be optimized with parquet stats
|
||||
func (e *SQLEngine) canUseParquetStatsForAggregation(spec AggregationSpec) bool {
|
||||
switch spec.Function {
|
||||
|
Reference in New Issue
Block a user