mirror of
https://github.com/seaweedfs/seaweedfs.git
synced 2025-09-20 00:37:57 +08:00
using consts; avoid test data in production binary
This commit is contained in:
@@ -288,55 +288,3 @@ func (c *SchemaCatalog) GetDefaultPartitionCount() int32 {
|
|||||||
defer c.mu.RUnlock()
|
defer c.mu.RUnlock()
|
||||||
return c.defaultPartitionCount
|
return c.defaultPartitionCount
|
||||||
}
|
}
|
||||||
|
|
||||||
// initSampleData populates the catalog with sample schema data for testing
|
|
||||||
func (c *SchemaCatalog) initSampleData() {
|
|
||||||
// Create sample databases and tables
|
|
||||||
c.databases["default"] = &DatabaseInfo{
|
|
||||||
Name: "default",
|
|
||||||
Tables: map[string]*TableInfo{
|
|
||||||
"user_events": {
|
|
||||||
Name: "user_events",
|
|
||||||
Columns: []ColumnInfo{
|
|
||||||
{Name: "user_id", Type: "VARCHAR(100)", Nullable: true},
|
|
||||||
{Name: "event_type", Type: "VARCHAR(50)", Nullable: true},
|
|
||||||
{Name: "data", Type: "TEXT", Nullable: true},
|
|
||||||
// System columns - hidden by default in SELECT *
|
|
||||||
{Name: "_timestamp_ns", Type: "BIGINT", Nullable: false},
|
|
||||||
{Name: "_key", Type: "VARCHAR(255)", Nullable: true},
|
|
||||||
{Name: "_source", Type: "VARCHAR(50)", Nullable: false},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
"system_logs": {
|
|
||||||
Name: "system_logs",
|
|
||||||
Columns: []ColumnInfo{
|
|
||||||
{Name: "level", Type: "VARCHAR(10)", Nullable: true},
|
|
||||||
{Name: "message", Type: "TEXT", Nullable: true},
|
|
||||||
{Name: "service", Type: "VARCHAR(50)", Nullable: true},
|
|
||||||
// System columns
|
|
||||||
{Name: "_timestamp_ns", Type: "BIGINT", Nullable: false},
|
|
||||||
{Name: "_key", Type: "VARCHAR(255)", Nullable: true},
|
|
||||||
{Name: "_source", Type: "VARCHAR(50)", Nullable: false},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
c.databases["test"] = &DatabaseInfo{
|
|
||||||
Name: "test",
|
|
||||||
Tables: map[string]*TableInfo{
|
|
||||||
"test-topic": {
|
|
||||||
Name: "test-topic",
|
|
||||||
Columns: []ColumnInfo{
|
|
||||||
{Name: "id", Type: "INT", Nullable: true},
|
|
||||||
{Name: "name", Type: "VARCHAR(100)", Nullable: true},
|
|
||||||
{Name: "value", Type: "DOUBLE", Nullable: true},
|
|
||||||
// System columns
|
|
||||||
{Name: "_timestamp_ns", Type: "BIGINT", Nullable: false},
|
|
||||||
{Name: "_key", Type: "VARCHAR(255)", Nullable: true},
|
|
||||||
{Name: "_source", Type: "VARCHAR(50)", Nullable: false},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
@@ -1554,8 +1554,14 @@ func (e *SQLEngine) executeSelectStatement(ctx context.Context, stmt *SelectStat
|
|||||||
|
|
||||||
// Convert to SQL result format
|
// Convert to SQL result format
|
||||||
if selectAll {
|
if selectAll {
|
||||||
columns = nil // Let converter determine all columns
|
if len(columns) > 0 {
|
||||||
return hybridScanner.ConvertToSQLResult(results, columns), nil
|
// SELECT *, specific_columns - include both auto-discovered and explicit columns
|
||||||
|
return hybridScanner.ConvertToSQLResultWithMixedColumns(results, columns), nil
|
||||||
|
} else {
|
||||||
|
// SELECT * only - let converter determine all columns (excludes system columns)
|
||||||
|
columns = nil
|
||||||
|
return hybridScanner.ConvertToSQLResult(results, columns), nil
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Handle custom column expressions (including arithmetic)
|
// Handle custom column expressions (including arithmetic)
|
||||||
@@ -1782,8 +1788,14 @@ func (e *SQLEngine) executeSelectStatementWithBrokerStats(ctx context.Context, s
|
|||||||
|
|
||||||
// Convert to SQL result format
|
// Convert to SQL result format
|
||||||
if selectAll {
|
if selectAll {
|
||||||
columns = nil // Let converter determine all columns
|
if len(columns) > 0 {
|
||||||
return hybridScanner.ConvertToSQLResult(results, columns), nil
|
// SELECT *, specific_columns - include both auto-discovered and explicit columns
|
||||||
|
return hybridScanner.ConvertToSQLResultWithMixedColumns(results, columns), nil
|
||||||
|
} else {
|
||||||
|
// SELECT * only - let converter determine all columns (excludes system columns)
|
||||||
|
columns = nil
|
||||||
|
return hybridScanner.ConvertToSQLResult(results, columns), nil
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Handle custom column expressions (including arithmetic)
|
// Handle custom column expressions (including arithmetic)
|
||||||
@@ -1881,33 +1893,75 @@ func (e *SQLEngine) extractTimeFromComparison(comp *ComparisonExpr, startTimeNs,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// isTimeColumn checks if a column name refers to a timestamp field
|
// isTimeColumn checks if a column refers to a timestamp field based on actual type information
|
||||||
|
// This function uses schema metadata, not naming conventions
|
||||||
func (e *SQLEngine) isTimeColumn(columnName string) bool {
|
func (e *SQLEngine) isTimeColumn(columnName string) bool {
|
||||||
if columnName == "" {
|
if columnName == "" {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
// System timestamp columns
|
// System timestamp columns are always time columns
|
||||||
timeColumns := []string{
|
if columnName == SW_COLUMN_NAME_TIMESTAMP {
|
||||||
"_timestamp_ns", // SeaweedFS MQ system timestamp (nanoseconds)
|
return true
|
||||||
"timestamp_ns", // Alternative naming
|
|
||||||
"timestamp", // Common timestamp field
|
|
||||||
"created_at", // Common creation time field
|
|
||||||
"updated_at", // Common update time field
|
|
||||||
"event_time", // Event timestamp
|
|
||||||
"log_time", // Log timestamp
|
|
||||||
"ts", // Short form
|
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, timeCol := range timeColumns {
|
// For user-defined columns, check actual schema type information
|
||||||
if strings.EqualFold(columnName, timeCol) {
|
if e.catalog != nil {
|
||||||
return true
|
currentDB := e.catalog.GetCurrentDatabase()
|
||||||
|
if currentDB == "" {
|
||||||
|
currentDB = "default"
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get current table context from query execution
|
||||||
|
// Note: This is a limitation - we need table context here
|
||||||
|
// In a full implementation, this would be passed from the query context
|
||||||
|
tableInfo, err := e.getCurrentTableInfo(currentDB)
|
||||||
|
if err == nil && tableInfo != nil {
|
||||||
|
for _, col := range tableInfo.Columns {
|
||||||
|
if strings.EqualFold(col.Name, columnName) {
|
||||||
|
// Use actual SQL type to determine if this is a timestamp
|
||||||
|
return e.isSQLTypeTimestamp(col.Type)
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Only return true if we have explicit type information
|
||||||
|
// No guessing based on column names
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// isSQLTypeTimestamp checks if a SQL type string represents a timestamp type
|
||||||
|
func (e *SQLEngine) isSQLTypeTimestamp(sqlType string) bool {
|
||||||
|
upperType := strings.ToUpper(strings.TrimSpace(sqlType))
|
||||||
|
|
||||||
|
// Handle type with precision/length specifications
|
||||||
|
if idx := strings.Index(upperType, "("); idx != -1 {
|
||||||
|
upperType = upperType[:idx]
|
||||||
|
}
|
||||||
|
|
||||||
|
switch upperType {
|
||||||
|
case "TIMESTAMP", "DATETIME":
|
||||||
|
return true
|
||||||
|
case "BIGINT":
|
||||||
|
// BIGINT could be a timestamp if it follows the pattern for timestamp storage
|
||||||
|
// This is a heuristic - in a better system, we'd have semantic type information
|
||||||
|
return false // Conservative approach - require explicit TIMESTAMP type
|
||||||
|
default:
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// getCurrentTableInfo attempts to get table info for the current query context
|
||||||
|
// This is a simplified implementation - ideally table context would be passed explicitly
|
||||||
|
func (e *SQLEngine) getCurrentTableInfo(database string) (*TableInfo, error) {
|
||||||
|
// This is a limitation of the current architecture
|
||||||
|
// In practice, we'd need the table context from the current query
|
||||||
|
// For now, return nil to fallback to naming conventions
|
||||||
|
// TODO: Enhance architecture to pass table context through query execution
|
||||||
|
return nil, fmt.Errorf("table context not available in current architecture")
|
||||||
|
}
|
||||||
|
|
||||||
// getColumnName extracts column name from expression (handles ColName types)
|
// getColumnName extracts column name from expression (handles ColName types)
|
||||||
func (e *SQLEngine) getColumnName(expr ExprNode) string {
|
func (e *SQLEngine) getColumnName(expr ExprNode) string {
|
||||||
switch exprType := expr.(type) {
|
switch exprType := expr.(type) {
|
||||||
@@ -2757,11 +2811,11 @@ func (e *SQLEngine) computeFileMinMax(filerClient filer_pb.FilerClient, filePath
|
|||||||
if e.isSystemColumn(columnName) {
|
if e.isSystemColumn(columnName) {
|
||||||
// Handle system columns
|
// Handle system columns
|
||||||
switch strings.ToLower(columnName) {
|
switch strings.ToLower(columnName) {
|
||||||
case "_timestamp_ns", "timestamp_ns":
|
case SW_COLUMN_NAME_TIMESTAMP:
|
||||||
columnValue = &schema_pb.Value{Kind: &schema_pb.Value_Int64Value{Int64Value: logEntry.TsNs}}
|
columnValue = &schema_pb.Value{Kind: &schema_pb.Value_Int64Value{Int64Value: logEntry.TsNs}}
|
||||||
case "_key", "key":
|
case SW_COLUMN_NAME_KEY:
|
||||||
columnValue = &schema_pb.Value{Kind: &schema_pb.Value_BytesValue{BytesValue: logEntry.Key}}
|
columnValue = &schema_pb.Value{Kind: &schema_pb.Value_BytesValue{BytesValue: logEntry.Key}}
|
||||||
case "_source", "source":
|
case SW_COLUMN_NAME_SOURCE:
|
||||||
columnValue = &schema_pb.Value{Kind: &schema_pb.Value_StringValue{StringValue: "live_log"}}
|
columnValue = &schema_pb.Value{Kind: &schema_pb.Value_StringValue{StringValue: "live_log"}}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
@@ -2894,7 +2948,7 @@ func (e *SQLEngine) convertLogEntryToRecordValue(logEntry *filer_pb.LogEntry) (*
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Add system columns
|
// Add system columns
|
||||||
recordValue.Fields[SW_COLUMN_NAME_TS] = &schema_pb.Value{
|
recordValue.Fields[SW_COLUMN_NAME_TIMESTAMP] = &schema_pb.Value{
|
||||||
Kind: &schema_pb.Value_Int64Value{Int64Value: logEntry.TsNs},
|
Kind: &schema_pb.Value_Int64Value{Int64Value: logEntry.TsNs},
|
||||||
}
|
}
|
||||||
recordValue.Fields[SW_COLUMN_NAME_KEY] = &schema_pb.Value{
|
recordValue.Fields[SW_COLUMN_NAME_KEY] = &schema_pb.Value{
|
||||||
@@ -3378,11 +3432,11 @@ func (e *SQLEngine) findColumnValue(result HybridScanResult, columnName string)
|
|||||||
// Check system columns first (stored separately in HybridScanResult)
|
// Check system columns first (stored separately in HybridScanResult)
|
||||||
lowerColumnName := strings.ToLower(columnName)
|
lowerColumnName := strings.ToLower(columnName)
|
||||||
switch lowerColumnName {
|
switch lowerColumnName {
|
||||||
case "_timestamp_ns", "timestamp_ns":
|
case SW_COLUMN_NAME_TIMESTAMP:
|
||||||
return &schema_pb.Value{Kind: &schema_pb.Value_Int64Value{Int64Value: result.Timestamp}}
|
return &schema_pb.Value{Kind: &schema_pb.Value_Int64Value{Int64Value: result.Timestamp}}
|
||||||
case "_key", "key":
|
case SW_COLUMN_NAME_KEY:
|
||||||
return &schema_pb.Value{Kind: &schema_pb.Value_BytesValue{BytesValue: result.Key}}
|
return &schema_pb.Value{Kind: &schema_pb.Value_BytesValue{BytesValue: result.Key}}
|
||||||
case "_source", "source":
|
case SW_COLUMN_NAME_SOURCE:
|
||||||
return &schema_pb.Value{Kind: &schema_pb.Value_StringValue{StringValue: result.Source}}
|
return &schema_pb.Value{Kind: &schema_pb.Value_StringValue{StringValue: result.Source}}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -942,9 +942,9 @@ func TestSQLEngine_ConvertLogEntryToRecordValue_ValidProtobuf(t *testing.T) {
|
|||||||
assert.NotNil(t, result.Fields)
|
assert.NotNil(t, result.Fields)
|
||||||
|
|
||||||
// Verify system columns are added correctly
|
// Verify system columns are added correctly
|
||||||
assert.Contains(t, result.Fields, SW_COLUMN_NAME_TS)
|
assert.Contains(t, result.Fields, SW_COLUMN_NAME_TIMESTAMP)
|
||||||
assert.Contains(t, result.Fields, SW_COLUMN_NAME_KEY)
|
assert.Contains(t, result.Fields, SW_COLUMN_NAME_KEY)
|
||||||
assert.Equal(t, int64(1609459200000000000), result.Fields[SW_COLUMN_NAME_TS].GetInt64Value())
|
assert.Equal(t, int64(1609459200000000000), result.Fields[SW_COLUMN_NAME_TIMESTAMP].GetInt64Value())
|
||||||
assert.Equal(t, []byte("test-key-001"), result.Fields[SW_COLUMN_NAME_KEY].GetBytesValue())
|
assert.Equal(t, []byte("test-key-001"), result.Fields[SW_COLUMN_NAME_KEY].GetBytesValue())
|
||||||
|
|
||||||
// Verify user data is preserved
|
// Verify user data is preserved
|
||||||
@@ -1004,15 +1004,15 @@ func TestSQLEngine_ConvertLogEntryToRecordValue_EmptyProtobuf(t *testing.T) {
|
|||||||
assert.NotNil(t, result.Fields)
|
assert.NotNil(t, result.Fields)
|
||||||
|
|
||||||
// Should have system columns
|
// Should have system columns
|
||||||
assert.Contains(t, result.Fields, SW_COLUMN_NAME_TS)
|
assert.Contains(t, result.Fields, SW_COLUMN_NAME_TIMESTAMP)
|
||||||
assert.Contains(t, result.Fields, SW_COLUMN_NAME_KEY)
|
assert.Contains(t, result.Fields, SW_COLUMN_NAME_KEY)
|
||||||
assert.Equal(t, int64(1609459200000000000), result.Fields[SW_COLUMN_NAME_TS].GetInt64Value())
|
assert.Equal(t, int64(1609459200000000000), result.Fields[SW_COLUMN_NAME_TIMESTAMP].GetInt64Value())
|
||||||
assert.Equal(t, []byte("empty-key"), result.Fields[SW_COLUMN_NAME_KEY].GetBytesValue())
|
assert.Equal(t, []byte("empty-key"), result.Fields[SW_COLUMN_NAME_KEY].GetBytesValue())
|
||||||
|
|
||||||
// Should have no user fields
|
// Should have no user fields
|
||||||
userFieldCount := 0
|
userFieldCount := 0
|
||||||
for fieldName := range result.Fields {
|
for fieldName := range result.Fields {
|
||||||
if fieldName != SW_COLUMN_NAME_TS && fieldName != SW_COLUMN_NAME_KEY {
|
if fieldName != SW_COLUMN_NAME_TIMESTAMP && fieldName != SW_COLUMN_NAME_KEY {
|
||||||
userFieldCount++
|
userFieldCount++
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1046,9 +1046,9 @@ func TestSQLEngine_ConvertLogEntryToRecordValue_NilFieldsMap(t *testing.T) {
|
|||||||
assert.NotNil(t, result.Fields) // Should be created by the function
|
assert.NotNil(t, result.Fields) // Should be created by the function
|
||||||
|
|
||||||
// Should have system columns
|
// Should have system columns
|
||||||
assert.Contains(t, result.Fields, SW_COLUMN_NAME_TS)
|
assert.Contains(t, result.Fields, SW_COLUMN_NAME_TIMESTAMP)
|
||||||
assert.Contains(t, result.Fields, SW_COLUMN_NAME_KEY)
|
assert.Contains(t, result.Fields, SW_COLUMN_NAME_KEY)
|
||||||
assert.Equal(t, int64(1609459200000000000), result.Fields[SW_COLUMN_NAME_TS].GetInt64Value())
|
assert.Equal(t, int64(1609459200000000000), result.Fields[SW_COLUMN_NAME_TIMESTAMP].GetInt64Value())
|
||||||
assert.Equal(t, []byte("nil-fields-key"), result.Fields[SW_COLUMN_NAME_KEY].GetBytesValue())
|
assert.Equal(t, []byte("nil-fields-key"), result.Fields[SW_COLUMN_NAME_KEY].GetBytesValue())
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1058,9 +1058,9 @@ func TestSQLEngine_ConvertLogEntryToRecordValue_SystemColumnOverride(t *testing.
|
|||||||
// Create RecordValue that already has system column names (should be overridden)
|
// Create RecordValue that already has system column names (should be overridden)
|
||||||
recordWithSystemCols := &schema_pb.RecordValue{
|
recordWithSystemCols := &schema_pb.RecordValue{
|
||||||
Fields: map[string]*schema_pb.Value{
|
Fields: map[string]*schema_pb.Value{
|
||||||
"user_field": {Kind: &schema_pb.Value_StringValue{StringValue: "user-data"}},
|
"user_field": {Kind: &schema_pb.Value_StringValue{StringValue: "user-data"}},
|
||||||
SW_COLUMN_NAME_TS: {Kind: &schema_pb.Value_Int64Value{Int64Value: 999999999}}, // Should be overridden
|
SW_COLUMN_NAME_TIMESTAMP: {Kind: &schema_pb.Value_Int64Value{Int64Value: 999999999}}, // Should be overridden
|
||||||
SW_COLUMN_NAME_KEY: {Kind: &schema_pb.Value_StringValue{StringValue: "old-key"}}, // Should be overridden
|
SW_COLUMN_NAME_KEY: {Kind: &schema_pb.Value_StringValue{StringValue: "old-key"}}, // Should be overridden
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
protobufData, err := proto.Marshal(recordWithSystemCols)
|
protobufData, err := proto.Marshal(recordWithSystemCols)
|
||||||
@@ -1082,7 +1082,7 @@ func TestSQLEngine_ConvertLogEntryToRecordValue_SystemColumnOverride(t *testing.
|
|||||||
assert.NotNil(t, result)
|
assert.NotNil(t, result)
|
||||||
|
|
||||||
// System columns should use LogEntry values, not protobuf values
|
// System columns should use LogEntry values, not protobuf values
|
||||||
assert.Equal(t, int64(1609459200000000000), result.Fields[SW_COLUMN_NAME_TS].GetInt64Value())
|
assert.Equal(t, int64(1609459200000000000), result.Fields[SW_COLUMN_NAME_TIMESTAMP].GetInt64Value())
|
||||||
assert.Equal(t, []byte("actual-key"), result.Fields[SW_COLUMN_NAME_KEY].GetBytesValue())
|
assert.Equal(t, []byte("actual-key"), result.Fields[SW_COLUMN_NAME_KEY].GetBytesValue())
|
||||||
|
|
||||||
// User field should be preserved
|
// User field should be preserved
|
||||||
@@ -1133,7 +1133,7 @@ func TestSQLEngine_ConvertLogEntryToRecordValue_ComplexDataTypes(t *testing.T) {
|
|||||||
assert.Equal(t, []byte{0x01, 0x02, 0x03}, result.Fields["bytes_field"].GetBytesValue())
|
assert.Equal(t, []byte{0x01, 0x02, 0x03}, result.Fields["bytes_field"].GetBytesValue())
|
||||||
|
|
||||||
// System columns should still be present
|
// System columns should still be present
|
||||||
assert.Contains(t, result.Fields, SW_COLUMN_NAME_TS)
|
assert.Contains(t, result.Fields, SW_COLUMN_NAME_TIMESTAMP)
|
||||||
assert.Contains(t, result.Fields, SW_COLUMN_NAME_KEY)
|
assert.Contains(t, result.Fields, SW_COLUMN_NAME_KEY)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -74,7 +74,7 @@ func NewHybridMessageScanner(filerClient filer_pb.FilerClient, brokerClient Brok
|
|||||||
|
|
||||||
// Add system columns that MQ adds to all records
|
// Add system columns that MQ adds to all records
|
||||||
recordType = schema.NewRecordTypeBuilder(recordTypeCopy).
|
recordType = schema.NewRecordTypeBuilder(recordTypeCopy).
|
||||||
WithField(SW_COLUMN_NAME_TS, schema.TypeInt64).
|
WithField(SW_COLUMN_NAME_TIMESTAMP, schema.TypeInt64).
|
||||||
WithField(SW_COLUMN_NAME_KEY, schema.TypeBytes).
|
WithField(SW_COLUMN_NAME_KEY, schema.TypeBytes).
|
||||||
RecordTypeEnd()
|
RecordTypeEnd()
|
||||||
|
|
||||||
@@ -328,7 +328,7 @@ func (hms *HybridMessageScanner) scanUnflushedDataWithStats(ctx context.Context,
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Extract system columns for result
|
// Extract system columns for result
|
||||||
timestamp := recordValue.Fields[SW_COLUMN_NAME_TS].GetInt64Value()
|
timestamp := recordValue.Fields[SW_COLUMN_NAME_TIMESTAMP].GetInt64Value()
|
||||||
key := recordValue.Fields[SW_COLUMN_NAME_KEY].GetBytesValue()
|
key := recordValue.Fields[SW_COLUMN_NAME_KEY].GetBytesValue()
|
||||||
|
|
||||||
// Apply column projection
|
// Apply column projection
|
||||||
@@ -336,7 +336,7 @@ func (hms *HybridMessageScanner) scanUnflushedDataWithStats(ctx context.Context,
|
|||||||
if len(options.Columns) == 0 {
|
if len(options.Columns) == 0 {
|
||||||
// Select all columns (excluding system columns from user view)
|
// Select all columns (excluding system columns from user view)
|
||||||
for name, value := range recordValue.Fields {
|
for name, value := range recordValue.Fields {
|
||||||
if name != SW_COLUMN_NAME_TS && name != SW_COLUMN_NAME_KEY {
|
if name != SW_COLUMN_NAME_TIMESTAMP && name != SW_COLUMN_NAME_KEY {
|
||||||
values[name] = value
|
values[name] = value
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -354,7 +354,7 @@ func (hms *HybridMessageScanner) scanUnflushedDataWithStats(ctx context.Context,
|
|||||||
Values: values,
|
Values: values,
|
||||||
Timestamp: timestamp,
|
Timestamp: timestamp,
|
||||||
Key: key,
|
Key: key,
|
||||||
Source: "in_memory_broker", // Tag for debugging/analysis
|
Source: "live_log", // Data from broker's unflushed messages
|
||||||
}
|
}
|
||||||
|
|
||||||
results = append(results, result)
|
results = append(results, result)
|
||||||
@@ -386,7 +386,7 @@ func (hms *HybridMessageScanner) convertDataMessageToRecord(msg *mq_pb.DataMessa
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Add timestamp
|
// Add timestamp
|
||||||
recordValue.Fields[SW_COLUMN_NAME_TS] = &schema_pb.Value{
|
recordValue.Fields[SW_COLUMN_NAME_TIMESTAMP] = &schema_pb.Value{
|
||||||
Kind: &schema_pb.Value_Int64Value{Int64Value: msg.TsNs},
|
Kind: &schema_pb.Value_Int64Value{Int64Value: msg.TsNs},
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -521,14 +521,6 @@ func (hms *HybridMessageScanner) scanPartitionHybridWithStats(ctx context.Contex
|
|||||||
results = mergedResults
|
results = mergedResults
|
||||||
}
|
}
|
||||||
|
|
||||||
// STEP 4: Fallback to sample data if no results found
|
|
||||||
// STEP 4: Fallback to sample data if no results found
|
|
||||||
// if len(results) == 0 {
|
|
||||||
// sampleResults := hms.generateSampleHybridData(options)
|
|
||||||
// results = append(results, sampleResults...)
|
|
||||||
// // Note: OFFSET and LIMIT will be applied at the end of the main scan function
|
|
||||||
// }
|
|
||||||
|
|
||||||
return results, stats, nil
|
return results, stats, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -595,7 +587,7 @@ func (hms *HybridMessageScanner) convertLogEntryToRecordValue(logEntry *filer_pb
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Add system columns from LogEntry
|
// Add system columns from LogEntry
|
||||||
recordValue.Fields[SW_COLUMN_NAME_TS] = &schema_pb.Value{
|
recordValue.Fields[SW_COLUMN_NAME_TIMESTAMP] = &schema_pb.Value{
|
||||||
Kind: &schema_pb.Value_Int64Value{Int64Value: logEntry.TsNs},
|
Kind: &schema_pb.Value_Int64Value{Int64Value: logEntry.TsNs},
|
||||||
}
|
}
|
||||||
recordValue.Fields[SW_COLUMN_NAME_KEY] = &schema_pb.Value{
|
recordValue.Fields[SW_COLUMN_NAME_KEY] = &schema_pb.Value{
|
||||||
@@ -617,7 +609,7 @@ func (hms *HybridMessageScanner) parseRawMessageWithSchema(logEntry *filer_pb.Lo
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Add system columns (always present)
|
// Add system columns (always present)
|
||||||
recordValue.Fields[SW_COLUMN_NAME_TS] = &schema_pb.Value{
|
recordValue.Fields[SW_COLUMN_NAME_TIMESTAMP] = &schema_pb.Value{
|
||||||
Kind: &schema_pb.Value_Int64Value{Int64Value: logEntry.TsNs},
|
Kind: &schema_pb.Value_Int64Value{Int64Value: logEntry.TsNs},
|
||||||
}
|
}
|
||||||
recordValue.Fields[SW_COLUMN_NAME_KEY] = &schema_pb.Value{
|
recordValue.Fields[SW_COLUMN_NAME_KEY] = &schema_pb.Value{
|
||||||
@@ -862,11 +854,11 @@ func (hms *HybridMessageScanner) ConvertToSQLResult(results []HybridScanResult,
|
|||||||
row := make([]sqltypes.Value, len(columns))
|
row := make([]sqltypes.Value, len(columns))
|
||||||
for j, columnName := range columns {
|
for j, columnName := range columns {
|
||||||
switch columnName {
|
switch columnName {
|
||||||
case "_source":
|
case SW_COLUMN_NAME_SOURCE:
|
||||||
row[j] = sqltypes.NewVarChar(result.Source)
|
row[j] = sqltypes.NewVarChar(result.Source)
|
||||||
case "_timestamp_ns":
|
case SW_COLUMN_NAME_TIMESTAMP:
|
||||||
row[j] = sqltypes.NewInt64(result.Timestamp)
|
row[j] = sqltypes.NewInt64(result.Timestamp)
|
||||||
case "_key":
|
case SW_COLUMN_NAME_KEY:
|
||||||
row[j] = sqltypes.NewVarBinary(string(result.Key))
|
row[j] = sqltypes.NewVarBinary(string(result.Key))
|
||||||
default:
|
default:
|
||||||
if value, exists := result.Values[columnName]; exists {
|
if value, exists := result.Values[columnName]; exists {
|
||||||
@@ -887,78 +879,89 @@ func (hms *HybridMessageScanner) ConvertToSQLResult(results []HybridScanResult,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// generateSampleHybridData creates sample data that simulates both live and archived messages
|
// ConvertToSQLResultWithMixedColumns handles SELECT *, specific_columns queries
|
||||||
func (hms *HybridMessageScanner) generateSampleHybridData(options HybridScanOptions) []HybridScanResult {
|
// Combines auto-discovered columns (from *) with explicitly requested columns
|
||||||
now := time.Now().UnixNano()
|
func (hms *HybridMessageScanner) ConvertToSQLResultWithMixedColumns(results []HybridScanResult, explicitColumns []string) *QueryResult {
|
||||||
|
if len(results) == 0 {
|
||||||
|
// For empty results, combine auto-discovered columns with explicit ones
|
||||||
|
columnSet := make(map[string]bool)
|
||||||
|
|
||||||
sampleData := []HybridScanResult{
|
// Add explicit columns first
|
||||||
// Simulated live log data (recent)
|
for _, col := range explicitColumns {
|
||||||
{
|
columnSet[col] = true
|
||||||
Values: map[string]*schema_pb.Value{
|
}
|
||||||
"user_id": {Kind: &schema_pb.Value_Int32Value{Int32Value: 1003}},
|
|
||||||
"event_type": {Kind: &schema_pb.Value_StringValue{StringValue: "live_login"}},
|
|
||||||
"data": {Kind: &schema_pb.Value_StringValue{StringValue: `{"ip": "10.0.0.1", "live": true}`}},
|
|
||||||
},
|
|
||||||
Timestamp: now - 300000000000, // 5 minutes ago
|
|
||||||
Key: []byte("live-user-1003"),
|
|
||||||
Source: "live_log",
|
|
||||||
},
|
|
||||||
{
|
|
||||||
Values: map[string]*schema_pb.Value{
|
|
||||||
"user_id": {Kind: &schema_pb.Value_Int32Value{Int32Value: 1004}},
|
|
||||||
"event_type": {Kind: &schema_pb.Value_StringValue{StringValue: "live_action"}},
|
|
||||||
"data": {Kind: &schema_pb.Value_StringValue{StringValue: `{"action": "click", "live": true}`}},
|
|
||||||
},
|
|
||||||
Timestamp: now - 120000000000, // 2 minutes ago
|
|
||||||
Key: []byte("live-user-1004"),
|
|
||||||
Source: "live_log",
|
|
||||||
},
|
|
||||||
|
|
||||||
// Simulated archived Parquet data (older)
|
// Build final column list
|
||||||
{
|
columns := make([]string, 0, len(columnSet))
|
||||||
Values: map[string]*schema_pb.Value{
|
for col := range columnSet {
|
||||||
"user_id": {Kind: &schema_pb.Value_Int32Value{Int32Value: 1001}},
|
columns = append(columns, col)
|
||||||
"event_type": {Kind: &schema_pb.Value_StringValue{StringValue: "archived_login"}},
|
}
|
||||||
"data": {Kind: &schema_pb.Value_StringValue{StringValue: `{"ip": "192.168.1.1", "archived": true}`}},
|
|
||||||
},
|
return &QueryResult{
|
||||||
Timestamp: now - 3600000000000, // 1 hour ago
|
Columns: columns,
|
||||||
Key: []byte("archived-user-1001"),
|
Rows: [][]sqltypes.Value{},
|
||||||
Source: "parquet_archive",
|
Database: hms.topic.Namespace,
|
||||||
},
|
Table: hms.topic.Name,
|
||||||
{
|
}
|
||||||
Values: map[string]*schema_pb.Value{
|
|
||||||
"user_id": {Kind: &schema_pb.Value_Int32Value{Int32Value: 1002}},
|
|
||||||
"event_type": {Kind: &schema_pb.Value_StringValue{StringValue: "archived_logout"}},
|
|
||||||
"data": {Kind: &schema_pb.Value_StringValue{StringValue: `{"duration": 1800, "archived": true}`}},
|
|
||||||
},
|
|
||||||
Timestamp: now - 1800000000000, // 30 minutes ago
|
|
||||||
Key: []byte("archived-user-1002"),
|
|
||||||
Source: "parquet_archive",
|
|
||||||
},
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Apply predicate filtering if specified
|
// Auto-discover columns from data (like SELECT *)
|
||||||
if options.Predicate != nil {
|
autoColumns := make(map[string]bool)
|
||||||
var filtered []HybridScanResult
|
for _, result := range results {
|
||||||
for _, result := range sampleData {
|
for columnName := range result.Values {
|
||||||
// Convert to RecordValue for predicate testing
|
autoColumns[columnName] = true
|
||||||
recordValue := &schema_pb.RecordValue{Fields: make(map[string]*schema_pb.Value)}
|
}
|
||||||
for k, v := range result.Values {
|
}
|
||||||
recordValue.Fields[k] = v
|
|
||||||
}
|
|
||||||
recordValue.Fields[SW_COLUMN_NAME_TS] = &schema_pb.Value{Kind: &schema_pb.Value_Int64Value{Int64Value: result.Timestamp}}
|
|
||||||
recordValue.Fields[SW_COLUMN_NAME_KEY] = &schema_pb.Value{Kind: &schema_pb.Value_BytesValue{BytesValue: result.Key}}
|
|
||||||
|
|
||||||
if options.Predicate(recordValue) {
|
// Combine auto-discovered and explicit columns
|
||||||
filtered = append(filtered, result)
|
columnSet := make(map[string]bool)
|
||||||
|
|
||||||
|
// Add auto-discovered columns first (regular data columns)
|
||||||
|
for col := range autoColumns {
|
||||||
|
columnSet[col] = true
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add explicit columns (may include system columns like _source)
|
||||||
|
for _, col := range explicitColumns {
|
||||||
|
columnSet[col] = true
|
||||||
|
}
|
||||||
|
|
||||||
|
// Build final column list
|
||||||
|
columns := make([]string, 0, len(columnSet))
|
||||||
|
for col := range columnSet {
|
||||||
|
columns = append(columns, col)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Convert to SQL rows
|
||||||
|
rows := make([][]sqltypes.Value, len(results))
|
||||||
|
for i, result := range results {
|
||||||
|
row := make([]sqltypes.Value, len(columns))
|
||||||
|
for j, columnName := range columns {
|
||||||
|
switch columnName {
|
||||||
|
case SW_COLUMN_NAME_TIMESTAMP:
|
||||||
|
row[j] = sqltypes.NewInt64(result.Timestamp)
|
||||||
|
case SW_COLUMN_NAME_KEY:
|
||||||
|
row[j] = sqltypes.NewVarBinary(string(result.Key))
|
||||||
|
case SW_COLUMN_NAME_SOURCE:
|
||||||
|
row[j] = sqltypes.NewVarChar(result.Source)
|
||||||
|
default:
|
||||||
|
// Regular data column
|
||||||
|
if value, exists := result.Values[columnName]; exists {
|
||||||
|
row[j] = convertSchemaValueToSQL(value)
|
||||||
|
} else {
|
||||||
|
row[j] = sqltypes.NULL
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
sampleData = filtered
|
rows[i] = row
|
||||||
}
|
}
|
||||||
|
|
||||||
// Note: OFFSET and LIMIT will be applied at the end of the main scan function
|
return &QueryResult{
|
||||||
|
Columns: columns,
|
||||||
return sampleData
|
Rows: rows,
|
||||||
|
Database: hms.topic.Namespace,
|
||||||
|
Table: hms.topic.Name,
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// ReadParquetStatistics efficiently reads column statistics from parquet files
|
// ReadParquetStatistics efficiently reads column statistics from parquet files
|
||||||
@@ -1428,7 +1431,7 @@ func (s *StreamingFlushedDataSource) startStreaming() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Extract system columns
|
// Extract system columns
|
||||||
timestamp := recordValue.Fields[SW_COLUMN_NAME_TS].GetInt64Value()
|
timestamp := recordValue.Fields[SW_COLUMN_NAME_TIMESTAMP].GetInt64Value()
|
||||||
key := recordValue.Fields[SW_COLUMN_NAME_KEY].GetBytesValue()
|
key := recordValue.Fields[SW_COLUMN_NAME_KEY].GetBytesValue()
|
||||||
|
|
||||||
// Apply column projection
|
// Apply column projection
|
||||||
@@ -1436,7 +1439,7 @@ func (s *StreamingFlushedDataSource) startStreaming() {
|
|||||||
if len(s.options.Columns) == 0 {
|
if len(s.options.Columns) == 0 {
|
||||||
// Select all columns (excluding system columns from user view)
|
// Select all columns (excluding system columns from user view)
|
||||||
for name, value := range recordValue.Fields {
|
for name, value := range recordValue.Fields {
|
||||||
if name != SW_COLUMN_NAME_TS && name != SW_COLUMN_NAME_KEY {
|
if name != SW_COLUMN_NAME_TIMESTAMP && name != SW_COLUMN_NAME_KEY {
|
||||||
values[name] = value
|
values[name] = value
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@@ -24,20 +24,17 @@ func TestSQLEngine_HybridSelectBasic(t *testing.T) {
|
|||||||
t.Error("Expected columns in result")
|
t.Error("Expected columns in result")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// In mock environment, we only get live_log data from unflushed messages
|
||||||
|
// parquet_archive data would come from parquet files in a real system
|
||||||
if len(result.Rows) == 0 {
|
if len(result.Rows) == 0 {
|
||||||
t.Error("Expected rows in result")
|
t.Error("Expected rows in result")
|
||||||
}
|
}
|
||||||
|
|
||||||
// Should have both live and archived data (4 sample records)
|
|
||||||
if len(result.Rows) != 4 {
|
|
||||||
t.Errorf("Expected 4 rows (2 live + 2 archived), got %d", len(result.Rows))
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check that we have the _source column showing data source
|
// Check that we have the _source column showing data source
|
||||||
hasSourceColumn := false
|
hasSourceColumn := false
|
||||||
sourceColumnIndex := -1
|
sourceColumnIndex := -1
|
||||||
for i, column := range result.Columns {
|
for i, column := range result.Columns {
|
||||||
if column == "_source" {
|
if column == SW_COLUMN_NAME_SOURCE {
|
||||||
hasSourceColumn = true
|
hasSourceColumn = true
|
||||||
sourceColumnIndex = i
|
sourceColumnIndex = i
|
||||||
break
|
break
|
||||||
@@ -48,19 +45,18 @@ func TestSQLEngine_HybridSelectBasic(t *testing.T) {
|
|||||||
t.Skip("_source column not available in fallback mode - test requires real SeaweedFS cluster")
|
t.Skip("_source column not available in fallback mode - test requires real SeaweedFS cluster")
|
||||||
}
|
}
|
||||||
|
|
||||||
// Verify we have both data sources
|
// Verify we have the expected data sources (in mock environment, only live_log)
|
||||||
if hasSourceColumn && sourceColumnIndex >= 0 {
|
if hasSourceColumn && sourceColumnIndex >= 0 {
|
||||||
foundLiveLog := false
|
foundLiveLog := false
|
||||||
foundParquetArchive := false
|
|
||||||
|
|
||||||
for _, row := range result.Rows {
|
for _, row := range result.Rows {
|
||||||
if sourceColumnIndex < len(row) {
|
if sourceColumnIndex < len(row) {
|
||||||
source := row[sourceColumnIndex].ToString()
|
source := row[sourceColumnIndex].ToString()
|
||||||
if source == "live_log" {
|
if source == "live_log" {
|
||||||
foundLiveLog = true
|
foundLiveLog = true
|
||||||
} else if source == "parquet_archive" {
|
|
||||||
foundParquetArchive = true
|
|
||||||
}
|
}
|
||||||
|
// In mock environment, all data comes from unflushed messages (live_log)
|
||||||
|
// In a real system, we would also see parquet_archive from parquet files
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -68,11 +64,7 @@ func TestSQLEngine_HybridSelectBasic(t *testing.T) {
|
|||||||
t.Error("Expected to find live_log data source in results")
|
t.Error("Expected to find live_log data source in results")
|
||||||
}
|
}
|
||||||
|
|
||||||
if !foundParquetArchive {
|
t.Logf("Found live_log data source from unflushed messages")
|
||||||
t.Error("Expected to find parquet_archive data source in results")
|
|
||||||
}
|
|
||||||
|
|
||||||
t.Logf("Found both live_log and parquet_archive data sources")
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -8,6 +8,7 @@ import (
|
|||||||
"github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
|
"github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
|
||||||
"github.com/seaweedfs/seaweedfs/weed/pb/schema_pb"
|
"github.com/seaweedfs/seaweedfs/weed/pb/schema_pb"
|
||||||
util_http "github.com/seaweedfs/seaweedfs/weed/util/http"
|
util_http "github.com/seaweedfs/seaweedfs/weed/util/http"
|
||||||
|
"google.golang.org/protobuf/proto"
|
||||||
)
|
)
|
||||||
|
|
||||||
// NewTestSchemaCatalog creates a schema catalog for testing with sample data
|
// NewTestSchemaCatalog creates a schema catalog for testing with sample data
|
||||||
@@ -21,10 +22,63 @@ func NewTestSchemaCatalog() *SchemaCatalog {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Pre-populate with sample data to avoid service discovery requirements
|
// Pre-populate with sample data to avoid service discovery requirements
|
||||||
catalog.initSampleData()
|
initTestSampleData(catalog)
|
||||||
return catalog
|
return catalog
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// initTestSampleData populates the catalog with sample schema data for testing
|
||||||
|
// This function is only available in test builds and not in production
|
||||||
|
func initTestSampleData(c *SchemaCatalog) {
|
||||||
|
// Create sample databases and tables
|
||||||
|
c.databases["default"] = &DatabaseInfo{
|
||||||
|
Name: "default",
|
||||||
|
Tables: map[string]*TableInfo{
|
||||||
|
"user_events": {
|
||||||
|
Name: "user_events",
|
||||||
|
Columns: []ColumnInfo{
|
||||||
|
{Name: "user_id", Type: "VARCHAR(100)", Nullable: true},
|
||||||
|
{Name: "event_type", Type: "VARCHAR(50)", Nullable: true},
|
||||||
|
{Name: "data", Type: "TEXT", Nullable: true},
|
||||||
|
// System columns - hidden by default in SELECT *
|
||||||
|
{Name: SW_COLUMN_NAME_TIMESTAMP, Type: "BIGINT", Nullable: false},
|
||||||
|
{Name: SW_COLUMN_NAME_KEY, Type: "VARCHAR(255)", Nullable: true},
|
||||||
|
{Name: SW_COLUMN_NAME_SOURCE, Type: "VARCHAR(50)", Nullable: false},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"system_logs": {
|
||||||
|
Name: "system_logs",
|
||||||
|
Columns: []ColumnInfo{
|
||||||
|
{Name: "level", Type: "VARCHAR(10)", Nullable: true},
|
||||||
|
{Name: "message", Type: "TEXT", Nullable: true},
|
||||||
|
{Name: "service", Type: "VARCHAR(50)", Nullable: true},
|
||||||
|
// System columns
|
||||||
|
{Name: SW_COLUMN_NAME_TIMESTAMP, Type: "BIGINT", Nullable: false},
|
||||||
|
{Name: SW_COLUMN_NAME_KEY, Type: "VARCHAR(255)", Nullable: true},
|
||||||
|
{Name: SW_COLUMN_NAME_SOURCE, Type: "VARCHAR(50)", Nullable: false},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
c.databases["test"] = &DatabaseInfo{
|
||||||
|
Name: "test",
|
||||||
|
Tables: map[string]*TableInfo{
|
||||||
|
"test-topic": {
|
||||||
|
Name: "test-topic",
|
||||||
|
Columns: []ColumnInfo{
|
||||||
|
{Name: "id", Type: "INT", Nullable: true},
|
||||||
|
{Name: "name", Type: "VARCHAR(100)", Nullable: true},
|
||||||
|
{Name: "value", Type: "DOUBLE", Nullable: true},
|
||||||
|
// System columns
|
||||||
|
{Name: SW_COLUMN_NAME_TIMESTAMP, Type: "BIGINT", Nullable: false},
|
||||||
|
{Name: SW_COLUMN_NAME_KEY, Type: "VARCHAR(255)", Nullable: true},
|
||||||
|
{Name: SW_COLUMN_NAME_SOURCE, Type: "VARCHAR(50)", Nullable: false},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// NewTestSQLEngine creates a new SQL execution engine for testing
|
// NewTestSQLEngine creates a new SQL execution engine for testing
|
||||||
// Does not attempt to connect to real SeaweedFS services
|
// Does not attempt to connect to real SeaweedFS services
|
||||||
func NewTestSQLEngine() *SQLEngine {
|
func NewTestSQLEngine() *SQLEngine {
|
||||||
@@ -225,22 +279,44 @@ func (m *MockBrokerClient) DeleteTopic(ctx context.Context, namespace, topicName
|
|||||||
}
|
}
|
||||||
|
|
||||||
// GetUnflushedMessages returns mock unflushed data for testing
|
// GetUnflushedMessages returns mock unflushed data for testing
|
||||||
// Always returns empty slice to simulate safe deduplication behavior
|
// Returns sample data as LogEntries to provide test data for SQL engine
|
||||||
func (m *MockBrokerClient) GetUnflushedMessages(ctx context.Context, namespace, topicName string, partition topic.Partition, startTimeNs int64) ([]*filer_pb.LogEntry, error) {
|
func (m *MockBrokerClient) GetUnflushedMessages(ctx context.Context, namespace, topicName string, partition topic.Partition, startTimeNs int64) ([]*filer_pb.LogEntry, error) {
|
||||||
if m.shouldFail {
|
if m.shouldFail {
|
||||||
return nil, fmt.Errorf("mock broker failed to get unflushed messages: %s", m.failMessage)
|
return nil, fmt.Errorf("mock broker failed to get unflushed messages: %s", m.failMessage)
|
||||||
}
|
}
|
||||||
|
|
||||||
// For testing, return empty slice to simulate:
|
// Generate sample data as LogEntries for testing
|
||||||
// 1. No unflushed data available
|
// This provides data that looks like it came from the broker's memory buffer
|
||||||
// 2. Safe deduplication behavior (prevents double-counting)
|
allSampleData := generateSampleHybridData(topicName, HybridScanOptions{})
|
||||||
// 3. Successful broker communication
|
|
||||||
//
|
|
||||||
// In a real implementation, this would:
|
|
||||||
// - Connect to actual broker
|
|
||||||
// - Access LocalPartition's LogBuffer
|
|
||||||
// - Use buffer_start metadata for deduplication
|
|
||||||
// - Return only truly unflushed messages
|
|
||||||
|
|
||||||
return []*filer_pb.LogEntry{}, nil
|
var logEntries []*filer_pb.LogEntry
|
||||||
|
for _, result := range allSampleData {
|
||||||
|
// Only return live_log entries as unflushed messages
|
||||||
|
// This matches real system behavior where unflushed messages come from broker memory
|
||||||
|
// parquet_archive data would come from parquet files, not unflushed messages
|
||||||
|
if result.Source != "live_log" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// Convert sample data to protobuf LogEntry format
|
||||||
|
recordValue := &schema_pb.RecordValue{Fields: make(map[string]*schema_pb.Value)}
|
||||||
|
for k, v := range result.Values {
|
||||||
|
recordValue.Fields[k] = v
|
||||||
|
}
|
||||||
|
|
||||||
|
// Serialize the RecordValue
|
||||||
|
data, err := proto.Marshal(recordValue)
|
||||||
|
if err != nil {
|
||||||
|
continue // Skip invalid entries
|
||||||
|
}
|
||||||
|
|
||||||
|
logEntry := &filer_pb.LogEntry{
|
||||||
|
TsNs: result.Timestamp,
|
||||||
|
Key: result.Key,
|
||||||
|
Data: data,
|
||||||
|
}
|
||||||
|
logEntries = append(logEntries, logEntry)
|
||||||
|
}
|
||||||
|
|
||||||
|
return logEntries, nil
|
||||||
}
|
}
|
||||||
|
@@ -16,17 +16,11 @@ import (
|
|||||||
"github.com/seaweedfs/seaweedfs/weed/util/chunk_cache"
|
"github.com/seaweedfs/seaweedfs/weed/util/chunk_cache"
|
||||||
)
|
)
|
||||||
|
|
||||||
// System columns added to all MQ records
|
|
||||||
const (
|
|
||||||
SW_COLUMN_NAME_TS = "_ts_ns" // Timestamp in nanoseconds
|
|
||||||
SW_COLUMN_NAME_KEY = "_key" // Message key
|
|
||||||
)
|
|
||||||
|
|
||||||
// ParquetScanner scans MQ topic Parquet files for SELECT queries
|
// ParquetScanner scans MQ topic Parquet files for SELECT queries
|
||||||
// Assumptions:
|
// Assumptions:
|
||||||
// 1. All MQ messages are stored in Parquet format in topic partitions
|
// 1. All MQ messages are stored in Parquet format in topic partitions
|
||||||
// 2. Each partition directory contains dated Parquet files
|
// 2. Each partition directory contains dated Parquet files
|
||||||
// 3. System columns (_ts_ns, _key) are added to user schema
|
// 3. System columns (_timestamp_ns, _key) are added to user schema
|
||||||
// 4. Predicate pushdown is used for efficient scanning
|
// 4. Predicate pushdown is used for efficient scanning
|
||||||
type ParquetScanner struct {
|
type ParquetScanner struct {
|
||||||
filerClient filer_pb.FilerClient
|
filerClient filer_pb.FilerClient
|
||||||
@@ -68,7 +62,7 @@ func NewParquetScanner(filerClient filer_pb.FilerClient, namespace, topicName st
|
|||||||
|
|
||||||
// Add system columns that MQ adds to all records
|
// Add system columns that MQ adds to all records
|
||||||
recordType = schema.NewRecordTypeBuilder(recordType).
|
recordType = schema.NewRecordTypeBuilder(recordType).
|
||||||
WithField(SW_COLUMN_NAME_TS, schema.TypeInt64).
|
WithField(SW_COLUMN_NAME_TIMESTAMP, schema.TypeInt64).
|
||||||
WithField(SW_COLUMN_NAME_KEY, schema.TypeBytes).
|
WithField(SW_COLUMN_NAME_KEY, schema.TypeBytes).
|
||||||
RecordTypeEnd()
|
RecordTypeEnd()
|
||||||
|
|
||||||
@@ -196,7 +190,7 @@ func (ps *ParquetScanner) scanParquetFile(ctx context.Context, entry *filer_pb.E
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Extract system columns
|
// Extract system columns
|
||||||
timestamp := recordValue.Fields[SW_COLUMN_NAME_TS].GetInt64Value()
|
timestamp := recordValue.Fields[SW_COLUMN_NAME_TIMESTAMP].GetInt64Value()
|
||||||
key := recordValue.Fields[SW_COLUMN_NAME_KEY].GetBytesValue()
|
key := recordValue.Fields[SW_COLUMN_NAME_KEY].GetBytesValue()
|
||||||
|
|
||||||
// Apply time filtering
|
// Apply time filtering
|
||||||
@@ -217,7 +211,7 @@ func (ps *ParquetScanner) scanParquetFile(ctx context.Context, entry *filer_pb.E
|
|||||||
if len(options.Columns) == 0 {
|
if len(options.Columns) == 0 {
|
||||||
// Select all columns (excluding system columns from user view)
|
// Select all columns (excluding system columns from user view)
|
||||||
for name, value := range recordValue.Fields {
|
for name, value := range recordValue.Fields {
|
||||||
if name != SW_COLUMN_NAME_TS && name != SW_COLUMN_NAME_KEY {
|
if name != SW_COLUMN_NAME_TIMESTAMP && name != SW_COLUMN_NAME_KEY {
|
||||||
values[name] = value
|
values[name] = value
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -293,7 +287,7 @@ func (ps *ParquetScanner) generateSampleData(options ScanOptions) []ScanResult {
|
|||||||
for k, v := range result.Values {
|
for k, v := range result.Values {
|
||||||
recordValue.Fields[k] = v
|
recordValue.Fields[k] = v
|
||||||
}
|
}
|
||||||
recordValue.Fields[SW_COLUMN_NAME_TS] = &schema_pb.Value{Kind: &schema_pb.Value_Int64Value{Int64Value: result.Timestamp}}
|
recordValue.Fields[SW_COLUMN_NAME_TIMESTAMP] = &schema_pb.Value{Kind: &schema_pb.Value_Int64Value{Int64Value: result.Timestamp}}
|
||||||
recordValue.Fields[SW_COLUMN_NAME_KEY] = &schema_pb.Value{Kind: &schema_pb.Value_BytesValue{BytesValue: result.Key}}
|
recordValue.Fields[SW_COLUMN_NAME_KEY] = &schema_pb.Value{Kind: &schema_pb.Value_BytesValue{BytesValue: result.Key}}
|
||||||
|
|
||||||
if options.Predicate(recordValue) {
|
if options.Predicate(recordValue) {
|
||||||
|
@@ -34,9 +34,10 @@ func TestSQLEngine_SelectBasic(t *testing.T) {
|
|||||||
t.Errorf("Expected %d columns, got %d", len(expectedColumns), len(result.Columns))
|
t.Errorf("Expected %d columns, got %d", len(expectedColumns), len(result.Columns))
|
||||||
}
|
}
|
||||||
|
|
||||||
// Should have 4 sample rows (hybrid data includes both live_log and parquet_archive)
|
// In mock environment, only live_log data from unflushed messages
|
||||||
if len(result.Rows) != 4 {
|
// parquet_archive data would come from parquet files in a real system
|
||||||
t.Errorf("Expected 4 rows, got %d", len(result.Rows))
|
if len(result.Rows) == 0 {
|
||||||
|
t.Error("Expected rows in result")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -4,12 +4,19 @@ import (
|
|||||||
"strings"
|
"strings"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// System column constants used throughout the SQL engine
|
||||||
|
const (
|
||||||
|
SW_COLUMN_NAME_TIMESTAMP = "_timestamp_ns" // Message timestamp in nanoseconds
|
||||||
|
SW_COLUMN_NAME_KEY = "_key" // Message key
|
||||||
|
SW_COLUMN_NAME_SOURCE = "_source" // Data source (live_log, parquet_archive, etc.)
|
||||||
|
)
|
||||||
|
|
||||||
// isSystemColumn checks if a column is a system column (_timestamp_ns, _key, _source)
|
// isSystemColumn checks if a column is a system column (_timestamp_ns, _key, _source)
|
||||||
func (e *SQLEngine) isSystemColumn(columnName string) bool {
|
func (e *SQLEngine) isSystemColumn(columnName string) bool {
|
||||||
lowerName := strings.ToLower(columnName)
|
lowerName := strings.ToLower(columnName)
|
||||||
return lowerName == "_timestamp_ns" || lowerName == "timestamp_ns" ||
|
return lowerName == SW_COLUMN_NAME_TIMESTAMP ||
|
||||||
lowerName == "_key" || lowerName == "key" ||
|
lowerName == SW_COLUMN_NAME_KEY ||
|
||||||
lowerName == "_source" || lowerName == "source"
|
lowerName == SW_COLUMN_NAME_SOURCE
|
||||||
}
|
}
|
||||||
|
|
||||||
// isRegularColumn checks if a column might be a regular data column (placeholder)
|
// isRegularColumn checks if a column might be a regular data column (placeholder)
|
||||||
@@ -23,7 +30,7 @@ func (e *SQLEngine) getSystemColumnGlobalMin(columnName string, allFileStats map
|
|||||||
lowerName := strings.ToLower(columnName)
|
lowerName := strings.ToLower(columnName)
|
||||||
|
|
||||||
switch lowerName {
|
switch lowerName {
|
||||||
case "_timestamp_ns", "timestamp_ns":
|
case SW_COLUMN_NAME_TIMESTAMP:
|
||||||
// For timestamps, find the earliest timestamp across all files
|
// For timestamps, find the earliest timestamp across all files
|
||||||
// This should match what's in the Extended["min"] metadata
|
// This should match what's in the Extended["min"] metadata
|
||||||
var minTimestamp *int64
|
var minTimestamp *int64
|
||||||
@@ -42,12 +49,12 @@ func (e *SQLEngine) getSystemColumnGlobalMin(columnName string, allFileStats map
|
|||||||
return *minTimestamp
|
return *minTimestamp
|
||||||
}
|
}
|
||||||
|
|
||||||
case "_key", "key":
|
case SW_COLUMN_NAME_KEY:
|
||||||
// For keys, we'd need to read the actual parquet column stats
|
// For keys, we'd need to read the actual parquet column stats
|
||||||
// Fall back to scanning if not available in our current stats
|
// Fall back to scanning if not available in our current stats
|
||||||
return nil
|
return nil
|
||||||
|
|
||||||
case "_source", "source":
|
case SW_COLUMN_NAME_SOURCE:
|
||||||
// Source is always "parquet_archive" for parquet files
|
// Source is always "parquet_archive" for parquet files
|
||||||
return "parquet_archive"
|
return "parquet_archive"
|
||||||
}
|
}
|
||||||
@@ -60,7 +67,7 @@ func (e *SQLEngine) getSystemColumnGlobalMax(columnName string, allFileStats map
|
|||||||
lowerName := strings.ToLower(columnName)
|
lowerName := strings.ToLower(columnName)
|
||||||
|
|
||||||
switch lowerName {
|
switch lowerName {
|
||||||
case "_timestamp_ns", "timestamp_ns":
|
case SW_COLUMN_NAME_TIMESTAMP:
|
||||||
// For timestamps, find the latest timestamp across all files
|
// For timestamps, find the latest timestamp across all files
|
||||||
// This should match what's in the Extended["max"] metadata
|
// This should match what's in the Extended["max"] metadata
|
||||||
var maxTimestamp *int64
|
var maxTimestamp *int64
|
||||||
@@ -79,12 +86,12 @@ func (e *SQLEngine) getSystemColumnGlobalMax(columnName string, allFileStats map
|
|||||||
return *maxTimestamp
|
return *maxTimestamp
|
||||||
}
|
}
|
||||||
|
|
||||||
case "_key", "key":
|
case SW_COLUMN_NAME_KEY:
|
||||||
// For keys, we'd need to read the actual parquet column stats
|
// For keys, we'd need to read the actual parquet column stats
|
||||||
// Fall back to scanning if not available in our current stats
|
// Fall back to scanning if not available in our current stats
|
||||||
return nil
|
return nil
|
||||||
|
|
||||||
case "_source", "source":
|
case SW_COLUMN_NAME_SOURCE:
|
||||||
// Source is always "parquet_archive" for parquet files
|
// Source is always "parquet_archive" for parquet files
|
||||||
return "parquet_archive"
|
return "parquet_archive"
|
||||||
}
|
}
|
||||||
|
137
weed/query/engine/test_sample_data_test.go
Normal file
137
weed/query/engine/test_sample_data_test.go
Normal file
@@ -0,0 +1,137 @@
|
|||||||
|
package engine
|
||||||
|
|
||||||
|
import (
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/seaweedfs/seaweedfs/weed/pb/schema_pb"
|
||||||
|
)
|
||||||
|
|
||||||
|
// generateSampleHybridData creates sample data that simulates both live and archived messages
|
||||||
|
// This function is only used for testing and is not included in production builds
|
||||||
|
func generateSampleHybridData(topicName string, options HybridScanOptions) []HybridScanResult {
|
||||||
|
now := time.Now().UnixNano()
|
||||||
|
|
||||||
|
// Generate different sample data based on topic name
|
||||||
|
var sampleData []HybridScanResult
|
||||||
|
|
||||||
|
switch topicName {
|
||||||
|
case "user_events":
|
||||||
|
sampleData = []HybridScanResult{
|
||||||
|
// Simulated live log data (recent)
|
||||||
|
{
|
||||||
|
Values: map[string]*schema_pb.Value{
|
||||||
|
"user_id": {Kind: &schema_pb.Value_Int32Value{Int32Value: 1003}},
|
||||||
|
"event_type": {Kind: &schema_pb.Value_StringValue{StringValue: "live_login"}},
|
||||||
|
"data": {Kind: &schema_pb.Value_StringValue{StringValue: `{"ip": "10.0.0.1", "live": true}`}},
|
||||||
|
},
|
||||||
|
Timestamp: now - 300000000000, // 5 minutes ago
|
||||||
|
Key: []byte("live-user-1003"),
|
||||||
|
Source: "live_log",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Values: map[string]*schema_pb.Value{
|
||||||
|
"user_id": {Kind: &schema_pb.Value_Int32Value{Int32Value: 1004}},
|
||||||
|
"event_type": {Kind: &schema_pb.Value_StringValue{StringValue: "live_action"}},
|
||||||
|
"data": {Kind: &schema_pb.Value_StringValue{StringValue: `{"action": "click", "live": true}`}},
|
||||||
|
},
|
||||||
|
Timestamp: now - 120000000000, // 2 minutes ago
|
||||||
|
Key: []byte("live-user-1004"),
|
||||||
|
Source: "live_log",
|
||||||
|
},
|
||||||
|
|
||||||
|
// Simulated archived Parquet data (older)
|
||||||
|
{
|
||||||
|
Values: map[string]*schema_pb.Value{
|
||||||
|
"user_id": {Kind: &schema_pb.Value_Int32Value{Int32Value: 1001}},
|
||||||
|
"event_type": {Kind: &schema_pb.Value_StringValue{StringValue: "archived_login"}},
|
||||||
|
"data": {Kind: &schema_pb.Value_StringValue{StringValue: `{"ip": "192.168.1.1", "archived": true}`}},
|
||||||
|
},
|
||||||
|
Timestamp: now - 3600000000000, // 1 hour ago
|
||||||
|
Key: []byte("archived-user-1001"),
|
||||||
|
Source: "parquet_archive",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Values: map[string]*schema_pb.Value{
|
||||||
|
"user_id": {Kind: &schema_pb.Value_Int32Value{Int32Value: 1002}},
|
||||||
|
"event_type": {Kind: &schema_pb.Value_StringValue{StringValue: "archived_logout"}},
|
||||||
|
"data": {Kind: &schema_pb.Value_StringValue{StringValue: `{"duration": 1800, "archived": true}`}},
|
||||||
|
},
|
||||||
|
Timestamp: now - 1800000000000, // 30 minutes ago
|
||||||
|
Key: []byte("archived-user-1002"),
|
||||||
|
Source: "parquet_archive",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
case "system_logs":
|
||||||
|
sampleData = []HybridScanResult{
|
||||||
|
// Simulated live system logs (recent)
|
||||||
|
{
|
||||||
|
Values: map[string]*schema_pb.Value{
|
||||||
|
"level": {Kind: &schema_pb.Value_StringValue{StringValue: "INFO"}},
|
||||||
|
"message": {Kind: &schema_pb.Value_StringValue{StringValue: "Live system startup completed"}},
|
||||||
|
"service": {Kind: &schema_pb.Value_StringValue{StringValue: "auth-service"}},
|
||||||
|
},
|
||||||
|
Timestamp: now - 240000000000, // 4 minutes ago
|
||||||
|
Key: []byte("live-sys-001"),
|
||||||
|
Source: "live_log",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Values: map[string]*schema_pb.Value{
|
||||||
|
"level": {Kind: &schema_pb.Value_StringValue{StringValue: "WARN"}},
|
||||||
|
"message": {Kind: &schema_pb.Value_StringValue{StringValue: "Live high memory usage detected"}},
|
||||||
|
"service": {Kind: &schema_pb.Value_StringValue{StringValue: "monitor-service"}},
|
||||||
|
},
|
||||||
|
Timestamp: now - 180000000000, // 3 minutes ago
|
||||||
|
Key: []byte("live-sys-002"),
|
||||||
|
Source: "live_log",
|
||||||
|
},
|
||||||
|
|
||||||
|
// Simulated archived system logs (older)
|
||||||
|
{
|
||||||
|
Values: map[string]*schema_pb.Value{
|
||||||
|
"level": {Kind: &schema_pb.Value_StringValue{StringValue: "ERROR"}},
|
||||||
|
"message": {Kind: &schema_pb.Value_StringValue{StringValue: "Archived database connection failed"}},
|
||||||
|
"service": {Kind: &schema_pb.Value_StringValue{StringValue: "db-service"}},
|
||||||
|
},
|
||||||
|
Timestamp: now - 7200000000000, // 2 hours ago
|
||||||
|
Key: []byte("archived-sys-001"),
|
||||||
|
Source: "parquet_archive",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Values: map[string]*schema_pb.Value{
|
||||||
|
"level": {Kind: &schema_pb.Value_StringValue{StringValue: "INFO"}},
|
||||||
|
"message": {Kind: &schema_pb.Value_StringValue{StringValue: "Archived batch job completed"}},
|
||||||
|
"service": {Kind: &schema_pb.Value_StringValue{StringValue: "batch-service"}},
|
||||||
|
},
|
||||||
|
Timestamp: now - 3600000000000, // 1 hour ago
|
||||||
|
Key: []byte("archived-sys-002"),
|
||||||
|
Source: "parquet_archive",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
default:
|
||||||
|
// For unknown topics, return empty data
|
||||||
|
sampleData = []HybridScanResult{}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Apply predicate filtering if specified
|
||||||
|
if options.Predicate != nil {
|
||||||
|
var filtered []HybridScanResult
|
||||||
|
for _, result := range sampleData {
|
||||||
|
// Convert to RecordValue for predicate testing
|
||||||
|
recordValue := &schema_pb.RecordValue{Fields: make(map[string]*schema_pb.Value)}
|
||||||
|
for k, v := range result.Values {
|
||||||
|
recordValue.Fields[k] = v
|
||||||
|
}
|
||||||
|
recordValue.Fields[SW_COLUMN_NAME_TIMESTAMP] = &schema_pb.Value{Kind: &schema_pb.Value_Int64Value{Int64Value: result.Timestamp}}
|
||||||
|
recordValue.Fields[SW_COLUMN_NAME_KEY] = &schema_pb.Value{Kind: &schema_pb.Value_BytesValue{BytesValue: result.Key}}
|
||||||
|
|
||||||
|
if options.Predicate(recordValue) {
|
||||||
|
filtered = append(filtered, result)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
sampleData = filtered
|
||||||
|
}
|
||||||
|
|
||||||
|
return sampleData
|
||||||
|
}
|
Reference in New Issue
Block a user