mirror of
https://github.com/seaweedfs/seaweedfs.git
synced 2025-09-20 04:07:57 +08:00
430 lines
12 KiB
Go
430 lines
12 KiB
Go
package engine
|
|
|
|
import (
|
|
"context"
|
|
"errors"
|
|
"testing"
|
|
|
|
"github.com/seaweedfs/seaweedfs/weed/mq/topic"
|
|
"github.com/seaweedfs/seaweedfs/weed/pb/schema_pb"
|
|
"github.com/stretchr/testify/assert"
|
|
"github.com/stretchr/testify/mock"
|
|
"github.com/xwb1989/sqlparser"
|
|
)
|
|
|
|
// Mock implementations for testing
|
|
type MockHybridMessageScanner struct {
|
|
mock.Mock
|
|
topic topic.Topic
|
|
}
|
|
|
|
func (m *MockHybridMessageScanner) ReadParquetStatistics(partitionPath string) ([]*ParquetFileStats, error) {
|
|
args := m.Called(partitionPath)
|
|
return args.Get(0).([]*ParquetFileStats), args.Error(1)
|
|
}
|
|
|
|
type MockSQLEngine struct {
|
|
*SQLEngine
|
|
mockPartitions map[string][]string
|
|
mockParquetSourceFiles map[string]map[string]bool
|
|
mockLiveLogRowCounts map[string]int64
|
|
mockColumnStats map[string]map[string]*ParquetColumnStats
|
|
}
|
|
|
|
func NewMockSQLEngine() *MockSQLEngine {
|
|
return &MockSQLEngine{
|
|
SQLEngine: &SQLEngine{
|
|
catalog: &SchemaCatalog{
|
|
databases: make(map[string]*DatabaseInfo),
|
|
currentDatabase: "test",
|
|
},
|
|
},
|
|
mockPartitions: make(map[string][]string),
|
|
mockParquetSourceFiles: make(map[string]map[string]bool),
|
|
mockLiveLogRowCounts: make(map[string]int64),
|
|
mockColumnStats: make(map[string]map[string]*ParquetColumnStats),
|
|
}
|
|
}
|
|
|
|
func (m *MockSQLEngine) discoverTopicPartitions(namespace, topicName string) ([]string, error) {
|
|
key := namespace + "." + topicName
|
|
if partitions, exists := m.mockPartitions[key]; exists {
|
|
return partitions, nil
|
|
}
|
|
return []string{"partition-1", "partition-2"}, nil
|
|
}
|
|
|
|
func (m *MockSQLEngine) extractParquetSourceFiles(fileStats []*ParquetFileStats) map[string]bool {
|
|
if len(fileStats) == 0 {
|
|
return make(map[string]bool)
|
|
}
|
|
return map[string]bool{"converted-log-1": true}
|
|
}
|
|
|
|
func (m *MockSQLEngine) countLiveLogRowsExcludingParquetSources(partition string, parquetSources map[string]bool) (int64, error) {
|
|
if count, exists := m.mockLiveLogRowCounts[partition]; exists {
|
|
return count, nil
|
|
}
|
|
return 25, nil
|
|
}
|
|
|
|
func (m *MockSQLEngine) computeLiveLogMinMax(partition, column string, parquetSources map[string]bool) (interface{}, interface{}, error) {
|
|
switch column {
|
|
case "id":
|
|
return int64(1), int64(50), nil
|
|
case "value":
|
|
return 10.5, 99.9, nil
|
|
default:
|
|
return nil, nil, nil
|
|
}
|
|
}
|
|
|
|
func (m *MockSQLEngine) getSystemColumnGlobalMin(column string, allFileStats map[string][]*ParquetFileStats) interface{} {
|
|
return int64(1000000000)
|
|
}
|
|
|
|
func (m *MockSQLEngine) getSystemColumnGlobalMax(column string, allFileStats map[string][]*ParquetFileStats) interface{} {
|
|
return int64(2000000000)
|
|
}
|
|
|
|
func createMockColumnStats(column string, minVal, maxVal interface{}) *ParquetColumnStats {
|
|
return &ParquetColumnStats{
|
|
ColumnName: column,
|
|
MinValue: convertToSchemaValue(minVal),
|
|
MaxValue: convertToSchemaValue(maxVal),
|
|
NullCount: 0,
|
|
}
|
|
}
|
|
|
|
func convertToSchemaValue(val interface{}) *schema_pb.Value {
|
|
switch v := val.(type) {
|
|
case int64:
|
|
return &schema_pb.Value{Kind: &schema_pb.Value_Int64Value{Int64Value: v}}
|
|
case float64:
|
|
return &schema_pb.Value{Kind: &schema_pb.Value_DoubleValue{DoubleValue: v}}
|
|
case string:
|
|
return &schema_pb.Value{Kind: &schema_pb.Value_StringValue{StringValue: v}}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// Test FastPathOptimizer
|
|
func TestFastPathOptimizer_DetermineStrategy(t *testing.T) {
|
|
engine := NewMockSQLEngine()
|
|
optimizer := NewFastPathOptimizer(engine.SQLEngine)
|
|
|
|
tests := []struct {
|
|
name string
|
|
aggregations []AggregationSpec
|
|
expected AggregationStrategy
|
|
}{
|
|
{
|
|
name: "Supported aggregations",
|
|
aggregations: []AggregationSpec{
|
|
{Function: "COUNT", Column: "*"},
|
|
{Function: "MAX", Column: "id"},
|
|
{Function: "MIN", Column: "value"},
|
|
},
|
|
expected: AggregationStrategy{
|
|
CanUseFastPath: true,
|
|
Reason: "all_aggregations_supported",
|
|
UnsupportedSpecs: []AggregationSpec{},
|
|
},
|
|
},
|
|
{
|
|
name: "Unsupported aggregation",
|
|
aggregations: []AggregationSpec{
|
|
{Function: "COUNT", Column: "*"},
|
|
{Function: "AVG", Column: "value"}, // Not supported
|
|
},
|
|
expected: AggregationStrategy{
|
|
CanUseFastPath: false,
|
|
Reason: "unsupported_aggregation_functions",
|
|
},
|
|
},
|
|
{
|
|
name: "Empty aggregations",
|
|
aggregations: []AggregationSpec{},
|
|
expected: AggregationStrategy{
|
|
CanUseFastPath: true,
|
|
Reason: "all_aggregations_supported",
|
|
UnsupportedSpecs: []AggregationSpec{},
|
|
},
|
|
},
|
|
}
|
|
|
|
for _, tt := range tests {
|
|
t.Run(tt.name, func(t *testing.T) {
|
|
strategy := optimizer.DetermineStrategy(tt.aggregations)
|
|
|
|
assert.Equal(t, tt.expected.CanUseFastPath, strategy.CanUseFastPath)
|
|
assert.Equal(t, tt.expected.Reason, strategy.Reason)
|
|
if !tt.expected.CanUseFastPath {
|
|
assert.NotEmpty(t, strategy.UnsupportedSpecs)
|
|
}
|
|
})
|
|
}
|
|
}
|
|
|
|
// Test AggregationComputer
|
|
func TestAggregationComputer_ComputeFastPathAggregations(t *testing.T) {
|
|
engine := NewMockSQLEngine()
|
|
computer := NewAggregationComputer(engine.SQLEngine)
|
|
|
|
dataSources := &TopicDataSources{
|
|
ParquetFiles: map[string][]*ParquetFileStats{
|
|
"/topics/test/topic1/partition-1": {
|
|
{
|
|
RowCount: 30,
|
|
ColumnStats: map[string]*ParquetColumnStats{
|
|
"id": createMockColumnStats("id", int64(10), int64(40)),
|
|
},
|
|
},
|
|
},
|
|
},
|
|
ParquetRowCount: 30,
|
|
LiveLogRowCount: 25,
|
|
PartitionsCount: 1,
|
|
}
|
|
|
|
partitions := []string{"/topics/test/topic1/partition-1"}
|
|
|
|
tests := []struct {
|
|
name string
|
|
aggregations []AggregationSpec
|
|
validate func(t *testing.T, results []AggregationResult)
|
|
}{
|
|
{
|
|
name: "COUNT aggregation",
|
|
aggregations: []AggregationSpec{
|
|
{Function: "COUNT", Column: "*"},
|
|
},
|
|
validate: func(t *testing.T, results []AggregationResult) {
|
|
assert.Len(t, results, 1)
|
|
assert.Equal(t, int64(55), results[0].Count) // 30 + 25
|
|
},
|
|
},
|
|
{
|
|
name: "MAX aggregation",
|
|
aggregations: []AggregationSpec{
|
|
{Function: "MAX", Column: "id"},
|
|
},
|
|
validate: func(t *testing.T, results []AggregationResult) {
|
|
assert.Len(t, results, 1)
|
|
// Should be max of parquet stats (40) - mock doesn't combine with live log
|
|
assert.Equal(t, int64(40), results[0].Max)
|
|
},
|
|
},
|
|
{
|
|
name: "MIN aggregation",
|
|
aggregations: []AggregationSpec{
|
|
{Function: "MIN", Column: "id"},
|
|
},
|
|
validate: func(t *testing.T, results []AggregationResult) {
|
|
assert.Len(t, results, 1)
|
|
// Should be min of parquet stats (10) - mock doesn't combine with live log
|
|
assert.Equal(t, int64(10), results[0].Min)
|
|
},
|
|
},
|
|
}
|
|
|
|
for _, tt := range tests {
|
|
t.Run(tt.name, func(t *testing.T) {
|
|
ctx := context.Background()
|
|
results, err := computer.ComputeFastPathAggregations(ctx, tt.aggregations, dataSources, partitions)
|
|
|
|
assert.NoError(t, err)
|
|
tt.validate(t, results)
|
|
})
|
|
}
|
|
}
|
|
|
|
// Test ExecutionPlanBuilder
|
|
func TestExecutionPlanBuilder_BuildAggregationPlan(t *testing.T) {
|
|
engine := NewMockSQLEngine()
|
|
builder := NewExecutionPlanBuilder(engine.SQLEngine)
|
|
|
|
// Parse a simple SELECT statement
|
|
stmt, err := sqlparser.Parse("SELECT COUNT(*) FROM test_topic")
|
|
assert.NoError(t, err)
|
|
selectStmt := stmt.(*sqlparser.Select)
|
|
|
|
aggregations := []AggregationSpec{
|
|
{Function: "COUNT", Column: "*"},
|
|
}
|
|
|
|
strategy := AggregationStrategy{
|
|
CanUseFastPath: true,
|
|
Reason: "all_aggregations_supported",
|
|
}
|
|
|
|
dataSources := &TopicDataSources{
|
|
ParquetRowCount: 100,
|
|
LiveLogRowCount: 50,
|
|
PartitionsCount: 3,
|
|
ParquetFiles: map[string][]*ParquetFileStats{
|
|
"partition-1": {{RowCount: 50}},
|
|
"partition-2": {{RowCount: 50}},
|
|
},
|
|
}
|
|
|
|
plan := builder.BuildAggregationPlan(selectStmt, aggregations, strategy, dataSources)
|
|
|
|
assert.Equal(t, "SELECT", plan.QueryType)
|
|
assert.Equal(t, "hybrid_fast_path", plan.ExecutionStrategy)
|
|
assert.Contains(t, plan.DataSources, "parquet_stats")
|
|
assert.Contains(t, plan.DataSources, "live_logs")
|
|
assert.Equal(t, 3, plan.PartitionsScanned)
|
|
assert.Equal(t, 2, plan.ParquetFilesScanned)
|
|
assert.Contains(t, plan.OptimizationsUsed, "parquet_statistics")
|
|
assert.Equal(t, []string{"COUNT(*)"}, plan.Aggregations)
|
|
assert.Equal(t, int64(50), plan.TotalRowsProcessed) // Only live logs scanned
|
|
}
|
|
|
|
// Test Error Types
|
|
func TestErrorTypes(t *testing.T) {
|
|
t.Run("AggregationError", func(t *testing.T) {
|
|
err := AggregationError{
|
|
Operation: "MAX",
|
|
Column: "id",
|
|
Cause: errors.New("column not found"),
|
|
}
|
|
|
|
expected := "aggregation error in MAX(id): column not found"
|
|
assert.Equal(t, expected, err.Error())
|
|
})
|
|
|
|
t.Run("DataSourceError", func(t *testing.T) {
|
|
err := DataSourceError{
|
|
Source: "partition_discovery:test.topic1",
|
|
Cause: errors.New("network timeout"),
|
|
}
|
|
|
|
expected := "data source error in partition_discovery:test.topic1: network timeout"
|
|
assert.Equal(t, expected, err.Error())
|
|
})
|
|
|
|
t.Run("OptimizationError", func(t *testing.T) {
|
|
err := OptimizationError{
|
|
Strategy: "fast_path_aggregation",
|
|
Reason: "unsupported function: AVG",
|
|
}
|
|
|
|
expected := "optimization failed for fast_path_aggregation: unsupported function: AVG"
|
|
assert.Equal(t, expected, err.Error())
|
|
})
|
|
}
|
|
|
|
// Integration Tests
|
|
func TestIntegration_FastPathOptimization(t *testing.T) {
|
|
engine := NewMockSQLEngine()
|
|
|
|
// Setup components
|
|
optimizer := NewFastPathOptimizer(engine.SQLEngine)
|
|
computer := NewAggregationComputer(engine.SQLEngine)
|
|
|
|
// Mock data setup
|
|
aggregations := []AggregationSpec{
|
|
{Function: "COUNT", Column: "*"},
|
|
{Function: "MAX", Column: "id"},
|
|
}
|
|
|
|
// Step 1: Determine strategy
|
|
strategy := optimizer.DetermineStrategy(aggregations)
|
|
assert.True(t, strategy.CanUseFastPath)
|
|
|
|
// Step 2: Mock data sources
|
|
dataSources := &TopicDataSources{
|
|
ParquetFiles: map[string][]*ParquetFileStats{
|
|
"/topics/test/topic1/partition-1": {{
|
|
RowCount: 75,
|
|
ColumnStats: map[string]*ParquetColumnStats{
|
|
"id": createMockColumnStats("id", int64(1), int64(100)),
|
|
},
|
|
}},
|
|
},
|
|
ParquetRowCount: 75,
|
|
LiveLogRowCount: 25,
|
|
PartitionsCount: 1,
|
|
}
|
|
|
|
partitions := []string{"/topics/test/topic1/partition-1"}
|
|
|
|
// Step 3: Compute aggregations
|
|
ctx := context.Background()
|
|
results, err := computer.ComputeFastPathAggregations(ctx, aggregations, dataSources, partitions)
|
|
assert.NoError(t, err)
|
|
assert.Len(t, results, 2)
|
|
assert.Equal(t, int64(100), results[0].Count) // 75 + 25
|
|
assert.Equal(t, int64(100), results[1].Max) // From parquet stats mock
|
|
}
|
|
|
|
func TestIntegration_FallbackToFullScan(t *testing.T) {
|
|
engine := NewMockSQLEngine()
|
|
optimizer := NewFastPathOptimizer(engine.SQLEngine)
|
|
|
|
// Unsupported aggregations
|
|
aggregations := []AggregationSpec{
|
|
{Function: "AVG", Column: "value"}, // Not supported
|
|
}
|
|
|
|
// Step 1: Strategy should reject fast path
|
|
strategy := optimizer.DetermineStrategy(aggregations)
|
|
assert.False(t, strategy.CanUseFastPath)
|
|
assert.Equal(t, "unsupported_aggregation_functions", strategy.Reason)
|
|
assert.NotEmpty(t, strategy.UnsupportedSpecs)
|
|
}
|
|
|
|
// Benchmark Tests
|
|
func BenchmarkFastPathOptimizer_DetermineStrategy(b *testing.B) {
|
|
engine := NewMockSQLEngine()
|
|
optimizer := NewFastPathOptimizer(engine.SQLEngine)
|
|
|
|
aggregations := []AggregationSpec{
|
|
{Function: "COUNT", Column: "*"},
|
|
{Function: "MAX", Column: "id"},
|
|
{Function: "MIN", Column: "value"},
|
|
}
|
|
|
|
b.ResetTimer()
|
|
for i := 0; i < b.N; i++ {
|
|
strategy := optimizer.DetermineStrategy(aggregations)
|
|
_ = strategy.CanUseFastPath
|
|
}
|
|
}
|
|
|
|
func BenchmarkAggregationComputer_ComputeFastPathAggregations(b *testing.B) {
|
|
engine := NewMockSQLEngine()
|
|
computer := NewAggregationComputer(engine.SQLEngine)
|
|
|
|
dataSources := &TopicDataSources{
|
|
ParquetFiles: map[string][]*ParquetFileStats{
|
|
"partition-1": {{
|
|
RowCount: 1000,
|
|
ColumnStats: map[string]*ParquetColumnStats{
|
|
"id": createMockColumnStats("id", int64(1), int64(1000)),
|
|
},
|
|
}},
|
|
},
|
|
ParquetRowCount: 1000,
|
|
LiveLogRowCount: 100,
|
|
}
|
|
|
|
aggregations := []AggregationSpec{
|
|
{Function: "COUNT", Column: "*"},
|
|
{Function: "MAX", Column: "id"},
|
|
}
|
|
|
|
partitions := []string{"partition-1"}
|
|
ctx := context.Background()
|
|
|
|
b.ResetTimer()
|
|
for i := 0; i < b.N; i++ {
|
|
results, err := computer.ComputeFastPathAggregations(ctx, aggregations, dataSources, partitions)
|
|
if err != nil {
|
|
b.Fatal(err)
|
|
}
|
|
_ = results
|
|
}
|
|
}
|