Files
seaweedfs/weed/query/engine/select_test.go
chrislu db363d025d feat: Time Filter Extraction - Complete Performance Optimization
 FOURTH HIGH PRIORITY TODO COMPLETED!

 **Time Filter Extraction & Push-Down Optimization** (engine.go:198-199)
- Replaced hardcoded StartTimeNs=0, StopTimeNs=0 with intelligent extraction
- Added extractTimeFilters() with recursive WHERE clause analysis
- Smart time column detection (\_timestamp_ns, created_at, timestamp, etc.)
- Comprehensive time value parsing (nanoseconds, ISO dates, datetime formats)
- Operator reversal handling (column op value vs value op column)

🧠 **Intelligent WHERE Clause Processing:**
- AND expressions: Combine time bounds (intersection) 
- OR expressions: Skip extraction (safety) 
- Parentheses: Recursive unwrapping 
- Comparison operators: >, >=, <, <=, = 
- Multiple time formats: nanoseconds, RFC3339, date-only, datetime 

🚀 **Performance Impact:**
- Push-down filtering to hybrid scanner level
- Reduced data scanning at source (live logs + Parquet files)
- Time-based partition pruning potential
- Significant performance gains for time-series queries

📊 **Comprehensive Testing (21 tests passing):**
-  Time filter extraction (6 test scenarios)
-  Time column recognition (case-insensitive)
-  Time value parsing (5 formats)
-  Full integration with SELECT queries
-  Backward compatibility maintained

💡 **Real-World Query Examples:**
Before: Scans ALL data, filters in memory
  SELECT * FROM events WHERE \_timestamp_ns > 1672531200000000000;

After: Scans ONLY relevant time range at source level
  → StartTimeNs=1672531200000000000, StopTimeNs=0
  → Massive performance improvement for large datasets!

🎯 **Production Ready Features:**
- Multiple time column formats supported
- Graceful fallbacks for invalid dates
- OR clause safety (avoids incorrect optimization)
- Comprehensive error handling

**ALL MEDIUM PRIORITY TODOs NOW READY FOR NEXT PHASEtest ./weed/query/engine/ -v* 🎉
2025-08-31 22:03:04 -07:00

124 lines
3.3 KiB
Go

package engine
import (
"context"
"fmt"
"strings"
"testing"
)
func TestSQLEngine_SelectBasic(t *testing.T) {
engine := NewSQLEngine("localhost:8888")
// Test SELECT * FROM table
result, err := engine.ExecuteSQL(context.Background(), "SELECT * FROM user_events")
if err != nil {
t.Fatalf("Expected no error, got %v", err)
}
if result.Error != nil {
t.Fatalf("Expected no query error, got %v", result.Error)
}
if len(result.Columns) == 0 {
t.Error("Expected columns in result")
}
if len(result.Rows) == 0 {
t.Error("Expected rows in result")
}
// Should have sample data with 4 columns (includes _source from hybrid scanner)
expectedColumns := []string{"user_id", "event_type", "data", "_source"}
if len(result.Columns) != len(expectedColumns) {
t.Errorf("Expected %d columns, got %d", len(expectedColumns), len(result.Columns))
}
// Should have 4 sample rows (hybrid data includes both live_log and parquet_archive)
if len(result.Rows) != 4 {
t.Errorf("Expected 4 rows, got %d", len(result.Rows))
}
}
func TestSQLEngine_SelectWithLimit(t *testing.T) {
engine := NewSQLEngine("localhost:8888")
// Test SELECT with LIMIT
result, err := engine.ExecuteSQL(context.Background(), "SELECT * FROM user_events LIMIT 2")
if err != nil {
t.Fatalf("Expected no error, got %v", err)
}
if result.Error != nil {
t.Fatalf("Expected no query error, got %v", result.Error)
}
// Should have exactly 2 rows due to LIMIT
if len(result.Rows) != 2 {
t.Errorf("Expected 2 rows with LIMIT 2, got %d", len(result.Rows))
}
}
func TestSQLEngine_SelectSpecificColumns(t *testing.T) {
engine := NewSQLEngine("localhost:8888")
// Test SELECT specific columns (this will fall back to sample data)
result, err := engine.ExecuteSQL(context.Background(), "SELECT user_id, event_type FROM user_events")
if err != nil {
t.Fatalf("Expected no error, got %v", err)
}
if result.Error != nil {
t.Fatalf("Expected no query error, got %v", result.Error)
}
// Should have all columns for now (sample data doesn't implement projection yet)
if len(result.Columns) == 0 {
t.Error("Expected columns in result")
}
}
func TestSQLEngine_SelectFromNonExistentTable(t *testing.T) {
engine := NewSQLEngine("localhost:8888")
// Test SELECT from non-existent table
result, _ := engine.ExecuteSQL(context.Background(), "SELECT * FROM nonexistent_table")
if result.Error == nil {
t.Error("Expected error for non-existent table")
}
if !strings.Contains(result.Error.Error(), "not found") {
t.Errorf("Expected 'not found' error, got: %v", result.Error)
}
}
func TestSQLEngine_SelectDifferentTables(t *testing.T) {
engine := NewSQLEngine("localhost:8888")
// Test different sample tables
tables := []string{"user_events", "system_logs"}
for _, tableName := range tables {
result, err := engine.ExecuteSQL(context.Background(), fmt.Sprintf("SELECT * FROM %s", tableName))
if err != nil {
t.Errorf("Error querying table %s: %v", tableName, err)
continue
}
if result.Error != nil {
t.Errorf("Query error for table %s: %v", tableName, result.Error)
continue
}
if len(result.Columns) == 0 {
t.Errorf("No columns returned for table %s", tableName)
}
if len(result.Rows) == 0 {
t.Errorf("No rows returned for table %s", tableName)
}
t.Logf("Table %s: %d columns, %d rows", tableName, len(result.Columns), len(result.Rows))
}
}