feat: Add comprehensive string functions with extensive tests

Implemented String Functions:
- LENGTH: Get string length (supports all value types)
- UPPER/LOWER: Case conversion
- TRIM/LTRIM/RTRIM: Whitespace removal (space, tab, newline, carriage return)
- SUBSTRING: Extract substring with optional length (SQL 1-based indexing)
- CONCAT: Concatenate multiple values (supports mixed types, skips nulls)
- REPLACE: Replace all occurrences of substring
- POSITION: Find substring position (1-based, 0 if not found)
- LEFT/RIGHT: Extract leftmost/rightmost characters
- REVERSE: Reverse string with proper Unicode support

Key Features:
- Robust type conversion (string, int, float, bool, bytes)
- Unicode-safe operations (proper rune handling in REVERSE)
- SQL-compatible indexing (1-based for SUBSTRING, POSITION)
- Comprehensive error handling with descriptive messages
- Mixed-type support (e.g., CONCAT number with string)

Helper Functions:
- valueToString: Convert any schema_pb.Value to string
- valueToInt64: Convert numeric values to int64

Comprehensive test suite with 25+ test cases covering:
- All string functions with typical use cases
- Type conversion scenarios (numbers, booleans)
- Edge cases (empty strings, null values, Unicode)
- Error conditions and boundary testing

All tests passing 
This commit is contained in:
chrislu
2025-09-04 00:21:17 -07:00
parent 25b07fda6c
commit 179a7b446e
2 changed files with 635 additions and 0 deletions

View File

@@ -477,3 +477,374 @@ func (e *SQLEngine) DateTrunc(precision string, value *schema_pb.Value) (*schema
},
}, nil
}
// ===============================
// STRING FUNCTIONS
// ===============================
// Length returns the length of a string
func (e *SQLEngine) Length(value *schema_pb.Value) (*schema_pb.Value, error) {
if value == nil {
return nil, fmt.Errorf("LENGTH function requires non-null value")
}
str, err := e.valueToString(value)
if err != nil {
return nil, fmt.Errorf("LENGTH function conversion error: %v", err)
}
length := int64(len(str))
return &schema_pb.Value{
Kind: &schema_pb.Value_Int64Value{Int64Value: length},
}, nil
}
// Upper converts a string to uppercase
func (e *SQLEngine) Upper(value *schema_pb.Value) (*schema_pb.Value, error) {
if value == nil {
return nil, fmt.Errorf("UPPER function requires non-null value")
}
str, err := e.valueToString(value)
if err != nil {
return nil, fmt.Errorf("UPPER function conversion error: %v", err)
}
return &schema_pb.Value{
Kind: &schema_pb.Value_StringValue{StringValue: strings.ToUpper(str)},
}, nil
}
// Lower converts a string to lowercase
func (e *SQLEngine) Lower(value *schema_pb.Value) (*schema_pb.Value, error) {
if value == nil {
return nil, fmt.Errorf("LOWER function requires non-null value")
}
str, err := e.valueToString(value)
if err != nil {
return nil, fmt.Errorf("LOWER function conversion error: %v", err)
}
return &schema_pb.Value{
Kind: &schema_pb.Value_StringValue{StringValue: strings.ToLower(str)},
}, nil
}
// Trim removes leading and trailing whitespace from a string
func (e *SQLEngine) Trim(value *schema_pb.Value) (*schema_pb.Value, error) {
if value == nil {
return nil, fmt.Errorf("TRIM function requires non-null value")
}
str, err := e.valueToString(value)
if err != nil {
return nil, fmt.Errorf("TRIM function conversion error: %v", err)
}
return &schema_pb.Value{
Kind: &schema_pb.Value_StringValue{StringValue: strings.TrimSpace(str)},
}, nil
}
// LTrim removes leading whitespace from a string
func (e *SQLEngine) LTrim(value *schema_pb.Value) (*schema_pb.Value, error) {
if value == nil {
return nil, fmt.Errorf("LTRIM function requires non-null value")
}
str, err := e.valueToString(value)
if err != nil {
return nil, fmt.Errorf("LTRIM function conversion error: %v", err)
}
return &schema_pb.Value{
Kind: &schema_pb.Value_StringValue{StringValue: strings.TrimLeft(str, " \t\n\r")},
}, nil
}
// RTrim removes trailing whitespace from a string
func (e *SQLEngine) RTrim(value *schema_pb.Value) (*schema_pb.Value, error) {
if value == nil {
return nil, fmt.Errorf("RTRIM function requires non-null value")
}
str, err := e.valueToString(value)
if err != nil {
return nil, fmt.Errorf("RTRIM function conversion error: %v", err)
}
return &schema_pb.Value{
Kind: &schema_pb.Value_StringValue{StringValue: strings.TrimRight(str, " \t\n\r")},
}, nil
}
// Substring extracts a substring from a string
func (e *SQLEngine) Substring(value *schema_pb.Value, start *schema_pb.Value, length ...*schema_pb.Value) (*schema_pb.Value, error) {
if value == nil || start == nil {
return nil, fmt.Errorf("SUBSTRING function requires non-null value and start position")
}
str, err := e.valueToString(value)
if err != nil {
return nil, fmt.Errorf("SUBSTRING function value conversion error: %v", err)
}
startPos, err := e.valueToInt64(start)
if err != nil {
return nil, fmt.Errorf("SUBSTRING function start position conversion error: %v", err)
}
// Convert to 0-based indexing (SQL uses 1-based)
if startPos < 1 {
startPos = 1
}
startIdx := int(startPos - 1)
if startIdx >= len(str) {
return &schema_pb.Value{
Kind: &schema_pb.Value_StringValue{StringValue: ""},
}, nil
}
var result string
if len(length) > 0 && length[0] != nil {
lengthVal, err := e.valueToInt64(length[0])
if err != nil {
return nil, fmt.Errorf("SUBSTRING function length conversion error: %v", err)
}
if lengthVal <= 0 {
result = ""
} else {
endIdx := startIdx + int(lengthVal)
if endIdx > len(str) {
endIdx = len(str)
}
result = str[startIdx:endIdx]
}
} else {
result = str[startIdx:]
}
return &schema_pb.Value{
Kind: &schema_pb.Value_StringValue{StringValue: result},
}, nil
}
// Concat concatenates multiple strings
func (e *SQLEngine) Concat(values ...*schema_pb.Value) (*schema_pb.Value, error) {
if len(values) == 0 {
return &schema_pb.Value{
Kind: &schema_pb.Value_StringValue{StringValue: ""},
}, nil
}
var result strings.Builder
for i, value := range values {
if value == nil {
continue // Skip null values
}
str, err := e.valueToString(value)
if err != nil {
return nil, fmt.Errorf("CONCAT function value %d conversion error: %v", i, err)
}
result.WriteString(str)
}
return &schema_pb.Value{
Kind: &schema_pb.Value_StringValue{StringValue: result.String()},
}, nil
}
// Replace replaces all occurrences of a substring with another substring
func (e *SQLEngine) Replace(value, oldStr, newStr *schema_pb.Value) (*schema_pb.Value, error) {
if value == nil || oldStr == nil || newStr == nil {
return nil, fmt.Errorf("REPLACE function requires non-null values")
}
str, err := e.valueToString(value)
if err != nil {
return nil, fmt.Errorf("REPLACE function value conversion error: %v", err)
}
old, err := e.valueToString(oldStr)
if err != nil {
return nil, fmt.Errorf("REPLACE function old string conversion error: %v", err)
}
new, err := e.valueToString(newStr)
if err != nil {
return nil, fmt.Errorf("REPLACE function new string conversion error: %v", err)
}
result := strings.ReplaceAll(str, old, new)
return &schema_pb.Value{
Kind: &schema_pb.Value_StringValue{StringValue: result},
}, nil
}
// Position returns the position of a substring in a string (1-based, 0 if not found)
func (e *SQLEngine) Position(substring, value *schema_pb.Value) (*schema_pb.Value, error) {
if substring == nil || value == nil {
return nil, fmt.Errorf("POSITION function requires non-null values")
}
str, err := e.valueToString(value)
if err != nil {
return nil, fmt.Errorf("POSITION function string conversion error: %v", err)
}
substr, err := e.valueToString(substring)
if err != nil {
return nil, fmt.Errorf("POSITION function substring conversion error: %v", err)
}
pos := strings.Index(str, substr)
if pos == -1 {
pos = 0 // SQL returns 0 for not found
} else {
pos = pos + 1 // Convert to 1-based indexing
}
return &schema_pb.Value{
Kind: &schema_pb.Value_Int64Value{Int64Value: int64(pos)},
}, nil
}
// Left returns the leftmost characters of a string
func (e *SQLEngine) Left(value *schema_pb.Value, length *schema_pb.Value) (*schema_pb.Value, error) {
if value == nil || length == nil {
return nil, fmt.Errorf("LEFT function requires non-null values")
}
str, err := e.valueToString(value)
if err != nil {
return nil, fmt.Errorf("LEFT function string conversion error: %v", err)
}
lengthVal, err := e.valueToInt64(length)
if err != nil {
return nil, fmt.Errorf("LEFT function length conversion error: %v", err)
}
if lengthVal <= 0 {
return &schema_pb.Value{
Kind: &schema_pb.Value_StringValue{StringValue: ""},
}, nil
}
if int(lengthVal) >= len(str) {
return &schema_pb.Value{
Kind: &schema_pb.Value_StringValue{StringValue: str},
}, nil
}
return &schema_pb.Value{
Kind: &schema_pb.Value_StringValue{StringValue: str[:lengthVal]},
}, nil
}
// Right returns the rightmost characters of a string
func (e *SQLEngine) Right(value *schema_pb.Value, length *schema_pb.Value) (*schema_pb.Value, error) {
if value == nil || length == nil {
return nil, fmt.Errorf("RIGHT function requires non-null values")
}
str, err := e.valueToString(value)
if err != nil {
return nil, fmt.Errorf("RIGHT function string conversion error: %v", err)
}
lengthVal, err := e.valueToInt64(length)
if err != nil {
return nil, fmt.Errorf("RIGHT function length conversion error: %v", err)
}
if lengthVal <= 0 {
return &schema_pb.Value{
Kind: &schema_pb.Value_StringValue{StringValue: ""},
}, nil
}
if int(lengthVal) >= len(str) {
return &schema_pb.Value{
Kind: &schema_pb.Value_StringValue{StringValue: str},
}, nil
}
startPos := len(str) - int(lengthVal)
return &schema_pb.Value{
Kind: &schema_pb.Value_StringValue{StringValue: str[startPos:]},
}, nil
}
// Reverse reverses a string
func (e *SQLEngine) Reverse(value *schema_pb.Value) (*schema_pb.Value, error) {
if value == nil {
return nil, fmt.Errorf("REVERSE function requires non-null value")
}
str, err := e.valueToString(value)
if err != nil {
return nil, fmt.Errorf("REVERSE function conversion error: %v", err)
}
// Reverse the string rune by rune to handle Unicode correctly
runes := []rune(str)
for i, j := 0, len(runes)-1; i < j; i, j = i+1, j-1 {
runes[i], runes[j] = runes[j], runes[i]
}
return &schema_pb.Value{
Kind: &schema_pb.Value_StringValue{StringValue: string(runes)},
}, nil
}
// Helper function to convert schema_pb.Value to string
func (e *SQLEngine) valueToString(value *schema_pb.Value) (string, error) {
switch v := value.Kind.(type) {
case *schema_pb.Value_StringValue:
return v.StringValue, nil
case *schema_pb.Value_Int32Value:
return strconv.FormatInt(int64(v.Int32Value), 10), nil
case *schema_pb.Value_Int64Value:
return strconv.FormatInt(v.Int64Value, 10), nil
case *schema_pb.Value_FloatValue:
return strconv.FormatFloat(float64(v.FloatValue), 'g', -1, 32), nil
case *schema_pb.Value_DoubleValue:
return strconv.FormatFloat(v.DoubleValue, 'g', -1, 64), nil
case *schema_pb.Value_BoolValue:
if v.BoolValue {
return "true", nil
}
return "false", nil
case *schema_pb.Value_BytesValue:
return string(v.BytesValue), nil
default:
return "", fmt.Errorf("cannot convert value type to string")
}
}
// Helper function to convert schema_pb.Value to int64
func (e *SQLEngine) valueToInt64(value *schema_pb.Value) (int64, error) {
switch v := value.Kind.(type) {
case *schema_pb.Value_Int32Value:
return int64(v.Int32Value), nil
case *schema_pb.Value_Int64Value:
return v.Int64Value, nil
case *schema_pb.Value_FloatValue:
return int64(v.FloatValue), nil
case *schema_pb.Value_DoubleValue:
return int64(v.DoubleValue), nil
case *schema_pb.Value_StringValue:
if i, err := strconv.ParseInt(v.StringValue, 10, 64); err == nil {
return i, nil
}
return 0, fmt.Errorf("cannot convert string '%s' to integer", v.StringValue)
default:
return 0, fmt.Errorf("cannot convert value type to integer")
}
}