mirror of
https://github.com/seaweedfs/seaweedfs.git
synced 2025-09-19 16:27:55 +08:00
refactor
This commit is contained in:
551
weed/query/engine/aggregations.go
Normal file
551
weed/query/engine/aggregations.go
Normal file
@@ -0,0 +1,551 @@
|
||||
package engine
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"github.com/seaweedfs/seaweedfs/weed/pb/schema_pb"
|
||||
"github.com/seaweedfs/seaweedfs/weed/query/sqltypes"
|
||||
"github.com/xwb1989/sqlparser"
|
||||
)
|
||||
|
||||
// AggregationSpec defines an aggregation function to be computed
|
||||
type AggregationSpec struct {
|
||||
Function string // COUNT, SUM, AVG, MIN, MAX
|
||||
Column string // Column name, or "*" for COUNT(*)
|
||||
Alias string // Optional alias for the result column
|
||||
Distinct bool // Support for DISTINCT keyword
|
||||
}
|
||||
|
||||
// AggregationResult holds the computed result of an aggregation
|
||||
type AggregationResult struct {
|
||||
Count int64
|
||||
Sum float64
|
||||
Min interface{}
|
||||
Max interface{}
|
||||
}
|
||||
|
||||
// AggregationStrategy represents the strategy for executing aggregations
|
||||
type AggregationStrategy struct {
|
||||
CanUseFastPath bool
|
||||
Reason string
|
||||
UnsupportedSpecs []AggregationSpec
|
||||
}
|
||||
|
||||
// TopicDataSources represents the data sources available for a topic
|
||||
type TopicDataSources struct {
|
||||
ParquetFiles map[string][]*ParquetFileStats // partitionPath -> parquet file stats
|
||||
ParquetRowCount int64
|
||||
LiveLogRowCount int64
|
||||
PartitionsCount int
|
||||
}
|
||||
|
||||
// FastPathOptimizer handles fast path aggregation optimization decisions
|
||||
type FastPathOptimizer struct {
|
||||
engine *SQLEngine
|
||||
}
|
||||
|
||||
// NewFastPathOptimizer creates a new fast path optimizer
|
||||
func NewFastPathOptimizer(engine *SQLEngine) *FastPathOptimizer {
|
||||
return &FastPathOptimizer{engine: engine}
|
||||
}
|
||||
|
||||
// DetermineStrategy analyzes aggregations and determines if fast path can be used
|
||||
func (opt *FastPathOptimizer) DetermineStrategy(aggregations []AggregationSpec) AggregationStrategy {
|
||||
strategy := AggregationStrategy{
|
||||
CanUseFastPath: true,
|
||||
Reason: "all_aggregations_supported",
|
||||
UnsupportedSpecs: []AggregationSpec{},
|
||||
}
|
||||
|
||||
for _, spec := range aggregations {
|
||||
if !opt.engine.canUseParquetStatsForAggregation(spec) {
|
||||
strategy.CanUseFastPath = false
|
||||
strategy.Reason = "unsupported_aggregation_functions"
|
||||
strategy.UnsupportedSpecs = append(strategy.UnsupportedSpecs, spec)
|
||||
}
|
||||
}
|
||||
|
||||
return strategy
|
||||
}
|
||||
|
||||
// CollectDataSources gathers information about available data sources for a topic
|
||||
func (opt *FastPathOptimizer) CollectDataSources(ctx context.Context, hybridScanner *HybridMessageScanner) (*TopicDataSources, error) {
|
||||
dataSources := &TopicDataSources{
|
||||
ParquetFiles: make(map[string][]*ParquetFileStats),
|
||||
ParquetRowCount: 0,
|
||||
LiveLogRowCount: 0,
|
||||
PartitionsCount: 0,
|
||||
}
|
||||
|
||||
// Discover partitions for the topic
|
||||
relativePartitions, err := opt.engine.discoverTopicPartitions(hybridScanner.topic.Namespace, hybridScanner.topic.Name)
|
||||
if err != nil {
|
||||
return dataSources, DataSourceError{
|
||||
Source: "partition_discovery",
|
||||
Cause: err,
|
||||
}
|
||||
}
|
||||
|
||||
topicBasePath := fmt.Sprintf("/topics/%s/%s", hybridScanner.topic.Namespace, hybridScanner.topic.Name)
|
||||
|
||||
// Collect stats from each partition
|
||||
for _, relPartition := range relativePartitions {
|
||||
partitionPath := fmt.Sprintf("%s/%s", topicBasePath, relPartition)
|
||||
|
||||
// Read parquet file statistics
|
||||
parquetStats, err := hybridScanner.ReadParquetStatistics(partitionPath)
|
||||
if err == nil && len(parquetStats) > 0 {
|
||||
dataSources.ParquetFiles[partitionPath] = parquetStats
|
||||
for _, stat := range parquetStats {
|
||||
dataSources.ParquetRowCount += stat.RowCount
|
||||
}
|
||||
}
|
||||
|
||||
// Count live log files
|
||||
liveLogCount, _ := opt.engine.countLiveLogFiles(partitionPath, dataSources.ParquetFiles[partitionPath])
|
||||
dataSources.LiveLogRowCount += liveLogCount
|
||||
}
|
||||
|
||||
dataSources.PartitionsCount = len(relativePartitions)
|
||||
|
||||
return dataSources, nil
|
||||
}
|
||||
|
||||
// AggregationComputer handles the computation of aggregations using fast path
|
||||
type AggregationComputer struct {
|
||||
engine *SQLEngine
|
||||
}
|
||||
|
||||
// NewAggregationComputer creates a new aggregation computer
|
||||
func NewAggregationComputer(engine *SQLEngine) *AggregationComputer {
|
||||
return &AggregationComputer{engine: engine}
|
||||
}
|
||||
|
||||
// ComputeFastPathAggregations computes aggregations using parquet statistics and live log data
|
||||
func (comp *AggregationComputer) ComputeFastPathAggregations(
|
||||
ctx context.Context,
|
||||
aggregations []AggregationSpec,
|
||||
dataSources *TopicDataSources,
|
||||
partitions []string,
|
||||
) ([]AggregationResult, error) {
|
||||
|
||||
aggResults := make([]AggregationResult, len(aggregations))
|
||||
|
||||
for i, spec := range aggregations {
|
||||
switch spec.Function {
|
||||
case "COUNT":
|
||||
if spec.Column == "*" {
|
||||
aggResults[i].Count = dataSources.ParquetRowCount + dataSources.LiveLogRowCount
|
||||
} else {
|
||||
// For specific columns, we might need to account for NULLs in the future
|
||||
aggResults[i].Count = dataSources.ParquetRowCount + dataSources.LiveLogRowCount
|
||||
}
|
||||
|
||||
case "MIN":
|
||||
globalMin, err := comp.computeGlobalMin(spec, dataSources, partitions)
|
||||
if err != nil {
|
||||
return nil, AggregationError{
|
||||
Operation: spec.Function,
|
||||
Column: spec.Column,
|
||||
Cause: err,
|
||||
}
|
||||
}
|
||||
aggResults[i].Min = globalMin
|
||||
|
||||
case "MAX":
|
||||
globalMax, err := comp.computeGlobalMax(spec, dataSources, partitions)
|
||||
if err != nil {
|
||||
return nil, AggregationError{
|
||||
Operation: spec.Function,
|
||||
Column: spec.Column,
|
||||
Cause: err,
|
||||
}
|
||||
}
|
||||
aggResults[i].Max = globalMax
|
||||
|
||||
default:
|
||||
return nil, OptimizationError{
|
||||
Strategy: "fast_path_aggregation",
|
||||
Reason: fmt.Sprintf("unsupported aggregation function: %s", spec.Function),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return aggResults, nil
|
||||
}
|
||||
|
||||
// computeGlobalMin computes the global minimum value across all data sources
|
||||
func (comp *AggregationComputer) computeGlobalMin(spec AggregationSpec, dataSources *TopicDataSources, partitions []string) (interface{}, error) {
|
||||
var globalMin interface{}
|
||||
var globalMinValue *schema_pb.Value
|
||||
hasParquetStats := false
|
||||
|
||||
// Step 1: Get minimum from parquet statistics
|
||||
for _, fileStats := range dataSources.ParquetFiles {
|
||||
for _, fileStat := range fileStats {
|
||||
// Try case-insensitive column lookup
|
||||
var colStats *ParquetColumnStats
|
||||
var found bool
|
||||
|
||||
// First try exact match
|
||||
if stats, exists := fileStat.ColumnStats[spec.Column]; exists {
|
||||
colStats = stats
|
||||
found = true
|
||||
} else {
|
||||
// Try case-insensitive lookup
|
||||
for colName, stats := range fileStat.ColumnStats {
|
||||
if strings.EqualFold(colName, spec.Column) {
|
||||
colStats = stats
|
||||
found = true
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if found && colStats != nil && colStats.MinValue != nil {
|
||||
if globalMinValue == nil || comp.engine.compareValues(colStats.MinValue, globalMinValue) < 0 {
|
||||
globalMinValue = colStats.MinValue
|
||||
extractedValue := comp.engine.extractRawValue(colStats.MinValue)
|
||||
if extractedValue != nil {
|
||||
globalMin = extractedValue
|
||||
hasParquetStats = true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Step 2: Get minimum from live log data (only if no live logs or if we need to compare)
|
||||
if dataSources.LiveLogRowCount > 0 {
|
||||
for _, partition := range partitions {
|
||||
partitionParquetSources := make(map[string]bool)
|
||||
if partitionFileStats, exists := dataSources.ParquetFiles[partition]; exists {
|
||||
partitionParquetSources = comp.engine.extractParquetSourceFiles(partitionFileStats)
|
||||
}
|
||||
|
||||
liveLogMin, _, err := comp.engine.computeLiveLogMinMax(partition, spec.Column, partitionParquetSources)
|
||||
if err != nil {
|
||||
continue // Skip partitions with errors
|
||||
}
|
||||
|
||||
if liveLogMin != nil {
|
||||
if globalMin == nil {
|
||||
globalMin = liveLogMin
|
||||
} else {
|
||||
liveLogSchemaValue := comp.engine.convertRawValueToSchemaValue(liveLogMin)
|
||||
if liveLogSchemaValue != nil && comp.engine.compareValues(liveLogSchemaValue, globalMinValue) < 0 {
|
||||
globalMin = liveLogMin
|
||||
globalMinValue = liveLogSchemaValue
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Step 3: Handle system columns if no regular data found
|
||||
if globalMin == nil && !hasParquetStats {
|
||||
globalMin = comp.engine.getSystemColumnGlobalMin(spec.Column, dataSources.ParquetFiles)
|
||||
}
|
||||
|
||||
return globalMin, nil
|
||||
}
|
||||
|
||||
// computeGlobalMax computes the global maximum value across all data sources
|
||||
func (comp *AggregationComputer) computeGlobalMax(spec AggregationSpec, dataSources *TopicDataSources, partitions []string) (interface{}, error) {
|
||||
var globalMax interface{}
|
||||
var globalMaxValue *schema_pb.Value
|
||||
hasParquetStats := false
|
||||
|
||||
// Step 1: Get maximum from parquet statistics
|
||||
for _, fileStats := range dataSources.ParquetFiles {
|
||||
for _, fileStat := range fileStats {
|
||||
// Try case-insensitive column lookup
|
||||
var colStats *ParquetColumnStats
|
||||
var found bool
|
||||
|
||||
// First try exact match
|
||||
if stats, exists := fileStat.ColumnStats[spec.Column]; exists {
|
||||
colStats = stats
|
||||
found = true
|
||||
} else {
|
||||
// Try case-insensitive lookup
|
||||
for colName, stats := range fileStat.ColumnStats {
|
||||
if strings.EqualFold(colName, spec.Column) {
|
||||
colStats = stats
|
||||
found = true
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if found && colStats != nil && colStats.MaxValue != nil {
|
||||
if globalMaxValue == nil || comp.engine.compareValues(colStats.MaxValue, globalMaxValue) > 0 {
|
||||
globalMaxValue = colStats.MaxValue
|
||||
extractedValue := comp.engine.extractRawValue(colStats.MaxValue)
|
||||
if extractedValue != nil {
|
||||
globalMax = extractedValue
|
||||
hasParquetStats = true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Step 2: Get maximum from live log data (only if live logs exist)
|
||||
if dataSources.LiveLogRowCount > 0 {
|
||||
for _, partition := range partitions {
|
||||
partitionParquetSources := make(map[string]bool)
|
||||
if partitionFileStats, exists := dataSources.ParquetFiles[partition]; exists {
|
||||
partitionParquetSources = comp.engine.extractParquetSourceFiles(partitionFileStats)
|
||||
}
|
||||
|
||||
_, liveLogMax, err := comp.engine.computeLiveLogMinMax(partition, spec.Column, partitionParquetSources)
|
||||
if err != nil {
|
||||
continue // Skip partitions with errors
|
||||
}
|
||||
|
||||
if liveLogMax != nil {
|
||||
if globalMax == nil {
|
||||
globalMax = liveLogMax
|
||||
} else {
|
||||
liveLogSchemaValue := comp.engine.convertRawValueToSchemaValue(liveLogMax)
|
||||
if liveLogSchemaValue != nil && comp.engine.compareValues(liveLogSchemaValue, globalMaxValue) > 0 {
|
||||
globalMax = liveLogMax
|
||||
globalMaxValue = liveLogSchemaValue
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Step 3: Handle system columns if no regular data found
|
||||
if globalMax == nil && !hasParquetStats {
|
||||
globalMax = comp.engine.getSystemColumnGlobalMax(spec.Column, dataSources.ParquetFiles)
|
||||
}
|
||||
|
||||
return globalMax, nil
|
||||
}
|
||||
|
||||
// executeAggregationQuery handles SELECT queries with aggregation functions
|
||||
func (e *SQLEngine) executeAggregationQuery(ctx context.Context, hybridScanner *HybridMessageScanner, aggregations []AggregationSpec, stmt *sqlparser.Select) (*QueryResult, error) {
|
||||
// Parse WHERE clause for filtering
|
||||
var predicate func(*schema_pb.RecordValue) bool
|
||||
var err error
|
||||
if stmt.Where != nil {
|
||||
predicate, err = e.buildPredicate(stmt.Where.Expr)
|
||||
if err != nil {
|
||||
return &QueryResult{Error: err}, err
|
||||
}
|
||||
}
|
||||
|
||||
// Extract time filters for optimization
|
||||
startTimeNs, stopTimeNs := int64(0), int64(0)
|
||||
if stmt.Where != nil {
|
||||
startTimeNs, stopTimeNs = e.extractTimeFilters(stmt.Where.Expr)
|
||||
}
|
||||
|
||||
// FAST PATH: Try to use parquet statistics for optimization
|
||||
// This can be ~130x faster than scanning all data
|
||||
if stmt.Where == nil { // Only optimize when no complex WHERE clause
|
||||
fastResult, canOptimize := e.tryFastParquetAggregation(ctx, hybridScanner, aggregations)
|
||||
if canOptimize {
|
||||
fmt.Printf("Using fast hybrid statistics for aggregation (parquet stats + live log counts)\n")
|
||||
return fastResult, nil
|
||||
}
|
||||
}
|
||||
|
||||
// SLOW PATH: Fall back to full table scan
|
||||
fmt.Printf("Using full table scan for aggregation (parquet optimization not applicable)\n")
|
||||
|
||||
// Build scan options for full table scan (aggregations need all data)
|
||||
hybridScanOptions := HybridScanOptions{
|
||||
StartTimeNs: startTimeNs,
|
||||
StopTimeNs: stopTimeNs,
|
||||
Limit: 0, // No limit for aggregations - need all data
|
||||
Predicate: predicate,
|
||||
}
|
||||
|
||||
// Execute the hybrid scan to get all matching records
|
||||
results, err := hybridScanner.Scan(ctx, hybridScanOptions)
|
||||
if err != nil {
|
||||
return &QueryResult{Error: err}, err
|
||||
}
|
||||
|
||||
// Compute aggregations
|
||||
aggResults := e.computeAggregations(results, aggregations)
|
||||
|
||||
// Build result set
|
||||
columns := make([]string, len(aggregations))
|
||||
row := make([]sqltypes.Value, len(aggregations))
|
||||
|
||||
for i, spec := range aggregations {
|
||||
columns[i] = spec.Alias
|
||||
row[i] = e.formatAggregationResult(spec, aggResults[i])
|
||||
}
|
||||
|
||||
return &QueryResult{
|
||||
Columns: columns,
|
||||
Rows: [][]sqltypes.Value{row},
|
||||
}, nil
|
||||
}
|
||||
|
||||
// tryFastParquetAggregation attempts to compute aggregations using hybrid approach:
|
||||
// - Use parquet metadata for parquet files
|
||||
// - Count live log files for live data
|
||||
// - Combine both for accurate results per partition
|
||||
// Returns (result, canOptimize) where canOptimize=true means the hybrid fast path was used
|
||||
func (e *SQLEngine) tryFastParquetAggregation(ctx context.Context, hybridScanner *HybridMessageScanner, aggregations []AggregationSpec) (*QueryResult, bool) {
|
||||
// Use the new modular components
|
||||
optimizer := NewFastPathOptimizer(e)
|
||||
computer := NewAggregationComputer(e)
|
||||
|
||||
// Step 1: Determine strategy
|
||||
strategy := optimizer.DetermineStrategy(aggregations)
|
||||
if !strategy.CanUseFastPath {
|
||||
return nil, false
|
||||
}
|
||||
|
||||
// Step 2: Collect data sources
|
||||
dataSources, err := optimizer.CollectDataSources(ctx, hybridScanner)
|
||||
if err != nil {
|
||||
return nil, false
|
||||
}
|
||||
|
||||
// Build partition list for aggregation computer
|
||||
relativePartitions, err := e.discoverTopicPartitions(hybridScanner.topic.Namespace, hybridScanner.topic.Name)
|
||||
if err != nil {
|
||||
return nil, false
|
||||
}
|
||||
|
||||
topicBasePath := fmt.Sprintf("/topics/%s/%s", hybridScanner.topic.Namespace, hybridScanner.topic.Name)
|
||||
partitions := make([]string, len(relativePartitions))
|
||||
for i, relPartition := range relativePartitions {
|
||||
partitions[i] = fmt.Sprintf("%s/%s", topicBasePath, relPartition)
|
||||
}
|
||||
|
||||
// Debug: Show the hybrid optimization results
|
||||
if dataSources.ParquetRowCount > 0 || dataSources.LiveLogRowCount > 0 {
|
||||
partitionsWithLiveLogs := 0
|
||||
if dataSources.LiveLogRowCount > 0 {
|
||||
partitionsWithLiveLogs = 1 // Simplified for now
|
||||
}
|
||||
fmt.Printf("Hybrid fast aggregation with deduplication: %d parquet rows + %d deduplicated live log rows from %d partitions\n",
|
||||
dataSources.ParquetRowCount, dataSources.LiveLogRowCount, partitionsWithLiveLogs)
|
||||
}
|
||||
|
||||
// Step 3: Compute aggregations using fast path
|
||||
aggResults, err := computer.ComputeFastPathAggregations(ctx, aggregations, dataSources, partitions)
|
||||
if err != nil {
|
||||
return nil, false
|
||||
}
|
||||
|
||||
// Step 4: Build final query result
|
||||
columns := make([]string, len(aggregations))
|
||||
row := make([]sqltypes.Value, len(aggregations))
|
||||
|
||||
for i, spec := range aggregations {
|
||||
columns[i] = spec.Alias
|
||||
row[i] = e.formatAggregationResult(spec, aggResults[i])
|
||||
}
|
||||
|
||||
result := &QueryResult{
|
||||
Columns: columns,
|
||||
Rows: [][]sqltypes.Value{row},
|
||||
}
|
||||
|
||||
return result, true
|
||||
}
|
||||
|
||||
// computeAggregations computes aggregation results from a full table scan
|
||||
func (e *SQLEngine) computeAggregations(results []HybridScanResult, aggregations []AggregationSpec) []AggregationResult {
|
||||
aggResults := make([]AggregationResult, len(aggregations))
|
||||
|
||||
for i, spec := range aggregations {
|
||||
switch spec.Function {
|
||||
case "COUNT":
|
||||
if spec.Column == "*" {
|
||||
aggResults[i].Count = int64(len(results))
|
||||
} else {
|
||||
count := int64(0)
|
||||
for _, result := range results {
|
||||
if value := e.findColumnValue(result, spec.Column); value != nil && !e.isNullValue(value) {
|
||||
count++
|
||||
}
|
||||
}
|
||||
aggResults[i].Count = count
|
||||
}
|
||||
|
||||
case "SUM":
|
||||
sum := float64(0)
|
||||
for _, result := range results {
|
||||
if value := e.findColumnValue(result, spec.Column); value != nil {
|
||||
if numValue := e.convertToNumber(value); numValue != nil {
|
||||
sum += *numValue
|
||||
}
|
||||
}
|
||||
}
|
||||
aggResults[i].Sum = sum
|
||||
|
||||
case "AVG":
|
||||
sum := float64(0)
|
||||
count := int64(0)
|
||||
for _, result := range results {
|
||||
if value := e.findColumnValue(result, spec.Column); value != nil {
|
||||
if numValue := e.convertToNumber(value); numValue != nil {
|
||||
sum += *numValue
|
||||
count++
|
||||
}
|
||||
}
|
||||
}
|
||||
if count > 0 {
|
||||
aggResults[i].Sum = sum / float64(count) // Store average in Sum field
|
||||
aggResults[i].Count = count
|
||||
}
|
||||
|
||||
case "MIN":
|
||||
var min interface{}
|
||||
var minValue *schema_pb.Value
|
||||
for _, result := range results {
|
||||
if value := e.findColumnValue(result, spec.Column); value != nil {
|
||||
if minValue == nil || e.compareValues(value, minValue) < 0 {
|
||||
minValue = value
|
||||
min = e.extractRawValue(value)
|
||||
}
|
||||
}
|
||||
}
|
||||
aggResults[i].Min = min
|
||||
|
||||
case "MAX":
|
||||
var max interface{}
|
||||
var maxValue *schema_pb.Value
|
||||
for _, result := range results {
|
||||
if value := e.findColumnValue(result, spec.Column); value != nil {
|
||||
if maxValue == nil || e.compareValues(value, maxValue) > 0 {
|
||||
maxValue = value
|
||||
max = e.extractRawValue(value)
|
||||
}
|
||||
}
|
||||
}
|
||||
aggResults[i].Max = max
|
||||
}
|
||||
}
|
||||
|
||||
return aggResults
|
||||
}
|
||||
|
||||
// canUseParquetStatsForAggregation determines if an aggregation can be optimized with parquet stats
|
||||
func (e *SQLEngine) canUseParquetStatsForAggregation(spec AggregationSpec) bool {
|
||||
switch spec.Function {
|
||||
case "COUNT":
|
||||
return spec.Column == "*" || e.isSystemColumn(spec.Column) || e.isRegularColumn(spec.Column)
|
||||
case "MIN", "MAX":
|
||||
return e.isSystemColumn(spec.Column) || e.isRegularColumn(spec.Column)
|
||||
case "SUM", "AVG":
|
||||
// These require scanning actual values, not just min/max
|
||||
return false
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user