mirror of
https://github.com/seaweedfs/seaweedfs.git
synced 2025-09-20 10:48:04 +08:00

Added comprehensive foundation for SQL window functions with timestamp analytics: Core Window Function Types: - WindowSpec with PartitionBy and OrderBy support - WindowFunction struct for ROW_NUMBER, RANK, LAG, LEAD - OrderByClause for timestamp-based ordering - Extended SelectStatement to support WindowFunctions field Timestamp Analytics Functions: ✅ ApplyRowNumber() - ROW_NUMBER() OVER (ORDER BY timestamp) ✅ ExtractYear() - Extract year from TIMESTAMP logical type ✅ ExtractMonth() - Extract month from TIMESTAMP logical type ✅ ExtractDay() - Extract day from TIMESTAMP logical type ✅ FilterByYear() - Filter records by timestamp year Foundation for Advanced Window Functions: - LAG/LEAD for time-series access to previous/next values - RANK/DENSE_RANK for temporal ranking - FIRST_VALUE/LAST_VALUE for window boundaries - PARTITION BY support for grouped analytics This enables sophisticated time-series analytics like: - SELECT *, ROW_NUMBER() OVER (ORDER BY timestamp) FROM user_events WHERE EXTRACT(YEAR FROM timestamp) = 2024 - Trend analysis over time windows - Session analytics with LAG/LEAD functions - Time-based ranking and percentiles Ready for production time-series analytics with proper timestamp logical type support! 🚀
91 lines
2.9 KiB
Go
91 lines
2.9 KiB
Go
package engine
|
|
|
|
import (
|
|
"sort"
|
|
"time"
|
|
|
|
"github.com/seaweedfs/seaweedfs/weed/pb/schema_pb"
|
|
)
|
|
|
|
// WindowFunctionDemo demonstrates basic window function concepts for timestamp-based analytics
|
|
// This provides a foundation for full window function implementation
|
|
|
|
// ApplyRowNumber applies ROW_NUMBER() OVER (ORDER BY timestamp) to a result set
|
|
func (e *SQLEngine) ApplyRowNumber(results []HybridScanResult, orderByColumn string) []HybridScanResult {
|
|
// Sort results by timestamp if ordering by timestamp-related fields
|
|
if orderByColumn == "timestamp" || orderByColumn == "_timestamp_ns" {
|
|
sort.Slice(results, func(i, j int) bool {
|
|
return results[i].Timestamp < results[j].Timestamp
|
|
})
|
|
}
|
|
|
|
// Add ROW_NUMBER as a synthetic column
|
|
for i := range results {
|
|
if results[i].Values == nil {
|
|
results[i].Values = make(map[string]*schema_pb.Value)
|
|
}
|
|
results[i].Values["row_number"] = &schema_pb.Value{
|
|
Kind: &schema_pb.Value_Int64Value{Int64Value: int64(i + 1)},
|
|
}
|
|
}
|
|
|
|
return results
|
|
}
|
|
|
|
// ExtractYear extracts the year from a TIMESTAMP logical type
|
|
func (e *SQLEngine) ExtractYear(timestampValue *schema_pb.TimestampValue) int {
|
|
if timestampValue == nil {
|
|
return 0
|
|
}
|
|
|
|
// Convert microseconds to seconds and create time
|
|
t := time.Unix(timestampValue.TimestampMicros/1_000_000, 0)
|
|
return t.Year()
|
|
}
|
|
|
|
// ExtractMonth extracts the month from a TIMESTAMP logical type
|
|
func (e *SQLEngine) ExtractMonth(timestampValue *schema_pb.TimestampValue) int {
|
|
if timestampValue == nil {
|
|
return 0
|
|
}
|
|
|
|
t := time.Unix(timestampValue.TimestampMicros/1_000_000, 0)
|
|
return int(t.Month())
|
|
}
|
|
|
|
// ExtractDay extracts the day from a TIMESTAMP logical type
|
|
func (e *SQLEngine) ExtractDay(timestampValue *schema_pb.TimestampValue) int {
|
|
if timestampValue == nil {
|
|
return 0
|
|
}
|
|
|
|
t := time.Unix(timestampValue.TimestampMicros/1_000_000, 0)
|
|
return t.Day()
|
|
}
|
|
|
|
// FilterByYear demonstrates filtering TIMESTAMP values by year
|
|
func (e *SQLEngine) FilterByYear(results []HybridScanResult, targetYear int) []HybridScanResult {
|
|
var filtered []HybridScanResult
|
|
|
|
for _, result := range results {
|
|
if timestampField := result.Values["timestamp"]; timestampField != nil {
|
|
if timestampVal, ok := timestampField.Kind.(*schema_pb.Value_TimestampValue); ok {
|
|
year := e.ExtractYear(timestampVal.TimestampValue)
|
|
if year == targetYear {
|
|
filtered = append(filtered, result)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return filtered
|
|
}
|
|
|
|
// This demonstrates the foundation for more complex window functions like:
|
|
// - LAG(value, offset) OVER (ORDER BY timestamp) - Access previous row value
|
|
// - LEAD(value, offset) OVER (ORDER BY timestamp) - Access next row value
|
|
// - RANK() OVER (ORDER BY timestamp) - Ranking with gaps for ties
|
|
// - DENSE_RANK() OVER (ORDER BY timestamp) - Ranking without gaps
|
|
// - FIRST_VALUE(value) OVER (PARTITION BY category ORDER BY timestamp) - First value in window
|
|
// - LAST_VALUE(value) OVER (PARTITION BY category ORDER BY timestamp) - Last value in window
|