Files
seaweedfs/test/s3/parquet/Makefile
Chris Lu 64dcbbb25b test read write by s3fs and PyArrow native file system for s3 (#7520)
* test read write by s3fs and PyArrow native file system for s3

* address comments

* add github action
2025-11-21 11:18:32 -08:00

483 lines
23 KiB
Makefile

# Makefile for S3 Parquet Integration Tests
# This Makefile provides targets for running comprehensive S3 Parquet tests with PyArrow
# Default values
SEAWEEDFS_BINARY ?= weed
S3_PORT ?= 8333
FILER_PORT ?= 8888
VOLUME_PORT ?= 8080
MASTER_PORT ?= 9333
TEST_TIMEOUT ?= 15m
ACCESS_KEY ?= some_access_key1
SECRET_KEY ?= some_secret_key1
VOLUME_MAX_SIZE_MB ?= 50
VOLUME_MAX_COUNT ?= 100
BUCKET_NAME ?= test-parquet-bucket
ENABLE_SSE_S3 ?= false
# Python configuration
PYTHON ?= python3
VENV_DIR ?= .venv
PYTHON_TEST_SCRIPT ?= s3_parquet_test.py
# Test directory
TEST_DIR := $(shell pwd)
SEAWEEDFS_ROOT := $(shell cd ../../../ && pwd)
# Colors for output
RED := \033[0;31m
GREEN := \033[0;32m
YELLOW := \033[1;33m
NC := \033[0m # No Color
.PHONY: all build-weed check-binary check-python ci-test clean debug-logs debug-status help manual-start manual-stop setup-python start-seaweedfs start-seaweedfs-ci stop-seaweedfs stop-seaweedfs-safe test test-cross-fs test-cross-fs-with-server test-implicit-dir test-implicit-dir-with-server test-native-s3 test-native-s3-with-server test-native-s3-with-sse test-quick test-sse-s3-compat test-with-server
all: test
# Build SeaweedFS binary (GitHub Actions compatible)
build-weed:
@echo "Building SeaweedFS binary..."
@cd $(SEAWEEDFS_ROOT)/weed && go install -buildvcs=false
@echo "✅ SeaweedFS binary built successfully"
help:
@echo "SeaweedFS S3 Parquet Integration Tests"
@echo ""
@echo "Available targets:"
@echo " test - Run full S3 Parquet integration tests (small and large files)"
@echo " test-with-server - Run full tests with automatic server management (CI compatible)"
@echo " test-quick - Run quick tests with small files only (sets TEST_QUICK=1)"
@echo " test-implicit-dir - Test implicit directory fix for s3fs compatibility"
@echo " test-implicit-dir-with-server - Test implicit directory fix with server management"
@echo " test-native-s3 - Test PyArrow's native S3 filesystem (assumes server running)"
@echo " test-native-s3-with-server - Test PyArrow's native S3 filesystem with server management"
@echo " test-native-s3-with-sse - Test PyArrow's native S3 with SSE-S3 encryption enabled"
@echo " test-cross-fs - Test cross-filesystem compatibility (s3fs ↔ PyArrow native)"
@echo " test-cross-fs-with-server - Test cross-filesystem compatibility with server management"
@echo " test-sse-s3-compat - Comprehensive SSE-S3 compatibility test (multipart uploads)"
@echo " setup-python - Setup Python virtual environment and install dependencies"
@echo " check-python - Check if Python and required packages are available"
@echo " start-seaweedfs - Start SeaweedFS server for testing"
@echo " start-seaweedfs-ci - Start SeaweedFS server (CI-safe version)"
@echo " stop-seaweedfs - Stop SeaweedFS server"
@echo " stop-seaweedfs-safe - Stop SeaweedFS server (CI-safe version)"
@echo " clean - Clean up test artifacts"
@echo " check-binary - Check if SeaweedFS binary exists"
@echo " build-weed - Build SeaweedFS binary"
@echo ""
@echo "Configuration:"
@echo " SEAWEEDFS_BINARY=$(SEAWEEDFS_BINARY)"
@echo " S3_PORT=$(S3_PORT)"
@echo " FILER_PORT=$(FILER_PORT)"
@echo " VOLUME_PORT=$(VOLUME_PORT)"
@echo " MASTER_PORT=$(MASTER_PORT)"
@echo " BUCKET_NAME=$(BUCKET_NAME)"
@echo " VOLUME_MAX_SIZE_MB=$(VOLUME_MAX_SIZE_MB)"
@echo " ENABLE_SSE_S3=$(ENABLE_SSE_S3)"
@echo " PYTHON=$(PYTHON)"
check-binary:
@if ! command -v $(SEAWEEDFS_BINARY) > /dev/null 2>&1; then \
echo "$(RED)Error: SeaweedFS binary '$(SEAWEEDFS_BINARY)' not found in PATH$(NC)"; \
echo "Please build SeaweedFS first by running 'make' in the root directory"; \
exit 1; \
fi
@echo "$(GREEN)SeaweedFS binary found: $$(which $(SEAWEEDFS_BINARY))$(NC)"
check-python:
@if ! command -v $(PYTHON) > /dev/null 2>&1; then \
echo "$(RED)Error: Python '$(PYTHON)' not found$(NC)"; \
echo "Please install Python 3.8 or later"; \
exit 1; \
fi
@echo "$(GREEN)Python found: $$(which $(PYTHON)) ($$($(PYTHON) --version))$(NC)"
setup-python: check-python
@echo "$(YELLOW)Setting up Python virtual environment...$(NC)"
@if [ ! -d "$(VENV_DIR)" ]; then \
$(PYTHON) -m venv $(VENV_DIR); \
echo "$(GREEN)Virtual environment created$(NC)"; \
fi
@echo "$(YELLOW)Installing Python dependencies...$(NC)"
@$(VENV_DIR)/bin/pip install --upgrade pip > /dev/null
@$(VENV_DIR)/bin/pip install -r requirements.txt
@echo "$(GREEN)Python dependencies installed successfully$(NC)"
start-seaweedfs-ci: check-binary
@echo "$(YELLOW)Starting SeaweedFS server for Parquet testing...$(NC)"
# Clean up any existing processes first (CI-safe)
@echo "Cleaning up any existing processes..."
@if command -v lsof >/dev/null 2>&1; then \
lsof -ti :$(MASTER_PORT) 2>/dev/null | head -5 | while read pid; do kill -TERM $$pid 2>/dev/null || true; done; \
lsof -ti :$(VOLUME_PORT) 2>/dev/null | head -5 | while read pid; do kill -TERM $$pid 2>/dev/null || true; done; \
lsof -ti :$(FILER_PORT) 2>/dev/null | head -5 | while read pid; do kill -TERM $$pid 2>/dev/null || true; done; \
lsof -ti :$(S3_PORT) 2>/dev/null | head -5 | while read pid; do kill -TERM $$pid 2>/dev/null || true; done; \
lsof -ti :$$(( $(MASTER_PORT) + 10000 )) 2>/dev/null | head -5 | while read pid; do kill -TERM $$pid 2>/dev/null || true; done; \
lsof -ti :$$(( $(VOLUME_PORT) + 10000 )) 2>/dev/null | head -5 | while read pid; do kill -TERM $$pid 2>/dev/null || true; done; \
lsof -ti :$$(( $(FILER_PORT) + 10000 )) 2>/dev/null | head -5 | while read pid; do kill -TERM $$pid 2>/dev/null || true; done; \
fi
@sleep 2
# Create necessary directories
@mkdir -p /tmp/seaweedfs-test-parquet-master
@mkdir -p /tmp/seaweedfs-test-parquet-volume
@mkdir -p /tmp/seaweedfs-test-parquet-filer
# Clean up any old server logs
@rm -f /tmp/seaweedfs-parquet-*.log || true
# Start master server with volume size limit and explicit gRPC port
@echo "Starting master server..."
@nohup $(SEAWEEDFS_BINARY) master -port=$(MASTER_PORT) -port.grpc=$$(( $(MASTER_PORT) + 10000 )) -mdir=/tmp/seaweedfs-test-parquet-master -volumeSizeLimitMB=$(VOLUME_MAX_SIZE_MB) -ip=127.0.0.1 -peers=none > /tmp/seaweedfs-parquet-master.log 2>&1 &
@sleep 3
# Start volume server with master HTTP port and increased capacity
@echo "Starting volume server..."
@nohup $(SEAWEEDFS_BINARY) volume -port=$(VOLUME_PORT) -mserver=127.0.0.1:$(MASTER_PORT) -dir=/tmp/seaweedfs-test-parquet-volume -max=$(VOLUME_MAX_COUNT) -ip=127.0.0.1 -preStopSeconds=1 > /tmp/seaweedfs-parquet-volume.log 2>&1 &
@sleep 5
# Start filer server with embedded S3
@echo "Starting filer server with embedded S3..."
@if [ "$(ENABLE_SSE_S3)" = "true" ]; then \
echo " SSE-S3 encryption: ENABLED"; \
printf '{"identities":[{"name":"%s","credentials":[{"accessKey":"%s","secretKey":"%s"}],"actions":["Admin","Read","Write"]}],"buckets":[{"name":"$(BUCKET_NAME)","encryption":{"sseS3":{"enabled":true}}}]}' "$(ACCESS_KEY)" "$(ACCESS_KEY)" "$(SECRET_KEY)" > /tmp/seaweedfs-parquet-s3.json; \
else \
echo " SSE-S3 encryption: DISABLED"; \
printf '{"identities":[{"name":"%s","credentials":[{"accessKey":"%s","secretKey":"%s"}],"actions":["Admin","Read","Write"]}]}' "$(ACCESS_KEY)" "$(ACCESS_KEY)" "$(SECRET_KEY)" > /tmp/seaweedfs-parquet-s3.json; \
fi
@AWS_ACCESS_KEY_ID=$(ACCESS_KEY) AWS_SECRET_ACCESS_KEY=$(SECRET_KEY) nohup $(SEAWEEDFS_BINARY) filer -port=$(FILER_PORT) -port.grpc=$$(( $(FILER_PORT) + 10000 )) -master=127.0.0.1:$(MASTER_PORT) -dataCenter=defaultDataCenter -ip=127.0.0.1 -s3 -s3.port=$(S3_PORT) -s3.config=/tmp/seaweedfs-parquet-s3.json > /tmp/seaweedfs-parquet-filer.log 2>&1 &
@sleep 5
# Wait for S3 service to be ready - use port-based checking for reliability
@echo "$(YELLOW)Waiting for S3 service to be ready...$(NC)"
@for i in $$(seq 1 20); do \
if netstat -an 2>/dev/null | grep -q ":$(S3_PORT).*LISTEN" || \
ss -an 2>/dev/null | grep -q ":$(S3_PORT).*LISTEN" || \
lsof -i :$(S3_PORT) >/dev/null 2>&1; then \
echo "$(GREEN)S3 service is listening on port $(S3_PORT)$(NC)"; \
sleep 1; \
break; \
fi; \
if [ $$i -eq 20 ]; then \
echo "$(RED)S3 service failed to start within 20 seconds$(NC)"; \
echo "=== Detailed Logs ==="; \
echo "Master log:"; tail -30 /tmp/seaweedfs-parquet-master.log || true; \
echo "Volume log:"; tail -30 /tmp/seaweedfs-parquet-volume.log || true; \
echo "Filer log:"; tail -30 /tmp/seaweedfs-parquet-filer.log || true; \
echo "=== Port Status ==="; \
netstat -an 2>/dev/null | grep ":$(S3_PORT)" || \
ss -an 2>/dev/null | grep ":$(S3_PORT)" || \
echo "No port listening on $(S3_PORT)"; \
exit 1; \
fi; \
echo "Waiting for S3 service... ($$i/20)"; \
sleep 1; \
done
# Additional wait for filer gRPC to be ready
@echo "$(YELLOW)Waiting for filer gRPC to be ready...$(NC)"
@sleep 2
# Wait for volume server to register with master and ensure volume assignment works
@echo "$(YELLOW)Waiting for volume assignment to be ready...$(NC)"
@for i in $$(seq 1 30); do \
ASSIGN_RESULT=$$(curl -s "http://localhost:$(MASTER_PORT)/dir/assign?count=1" 2>/dev/null); \
if echo "$$ASSIGN_RESULT" | grep -q '"fid"'; then \
echo "$(GREEN)Volume assignment is ready$(NC)"; \
break; \
fi; \
if [ $$i -eq 30 ]; then \
echo "$(RED)Volume assignment not ready after 30 seconds$(NC)"; \
echo "=== Last assign attempt ==="; \
echo "$$ASSIGN_RESULT"; \
echo "=== Master Status ==="; \
curl -s "http://localhost:$(MASTER_PORT)/dir/status" 2>/dev/null || echo "Failed to get master status"; \
echo "=== Master Logs ==="; \
tail -50 /tmp/seaweedfs-parquet-master.log 2>/dev/null || echo "No master log"; \
echo "=== Volume Logs ==="; \
tail -50 /tmp/seaweedfs-parquet-volume.log 2>/dev/null || echo "No volume log"; \
exit 1; \
fi; \
echo "Waiting for volume assignment... ($$i/30)"; \
sleep 1; \
done
@echo "$(GREEN)SeaweedFS server started successfully for Parquet testing$(NC)"
@echo "Master: http://localhost:$(MASTER_PORT)"
@echo "Volume: http://localhost:$(VOLUME_PORT)"
@echo "Filer: http://localhost:$(FILER_PORT)"
@echo "S3: http://localhost:$(S3_PORT)"
@echo "Volume Max Size: $(VOLUME_MAX_SIZE_MB)MB"
start-seaweedfs: check-binary
@echo "$(YELLOW)Starting SeaweedFS server for Parquet testing...$(NC)"
@# Use port-based cleanup for consistency and safety
@echo "Cleaning up any existing processes..."
@lsof -ti :$(MASTER_PORT) 2>/dev/null | xargs -r kill -TERM || true
@lsof -ti :$(VOLUME_PORT) 2>/dev/null | xargs -r kill -TERM || true
@lsof -ti :$(FILER_PORT) 2>/dev/null | xargs -r kill -TERM || true
@lsof -ti :$(S3_PORT) 2>/dev/null | xargs -r kill -TERM || true
@# Clean up gRPC ports (HTTP port + 10000)
@lsof -ti :$$(( $(MASTER_PORT) + 10000 )) 2>/dev/null | xargs -r kill -TERM || true
@lsof -ti :$$(( $(VOLUME_PORT) + 10000 )) 2>/dev/null | xargs -r kill -TERM || true
@lsof -ti :$$(( $(FILER_PORT) + 10000 )) 2>/dev/null | xargs -r kill -TERM || true
@sleep 2
@$(MAKE) start-seaweedfs-ci
stop-seaweedfs:
@echo "$(YELLOW)Stopping SeaweedFS server...$(NC)"
@# Use port-based cleanup for consistency and safety
@lsof -ti :$(MASTER_PORT) 2>/dev/null | xargs -r kill -TERM || true
@lsof -ti :$(VOLUME_PORT) 2>/dev/null | xargs -r kill -TERM || true
@lsof -ti :$(FILER_PORT) 2>/dev/null | xargs -r kill -TERM || true
@lsof -ti :$(S3_PORT) 2>/dev/null | xargs -r kill -TERM || true
@# Clean up gRPC ports (HTTP port + 10000)
@lsof -ti :$$(( $(MASTER_PORT) + 10000 )) 2>/dev/null | xargs -r kill -TERM || true
@lsof -ti :$$(( $(VOLUME_PORT) + 10000 )) 2>/dev/null | xargs -r kill -TERM || true
@lsof -ti :$$(( $(FILER_PORT) + 10000 )) 2>/dev/null | xargs -r kill -TERM || true
@sleep 2
@echo "$(GREEN)SeaweedFS server stopped$(NC)"
# CI-safe server stop that's more conservative
stop-seaweedfs-safe:
@echo "$(YELLOW)Safely stopping SeaweedFS server...$(NC)"
@# Use port-based cleanup which is safer in CI
@if command -v lsof >/dev/null 2>&1; then \
echo "Using lsof for port-based cleanup..."; \
lsof -ti :$(MASTER_PORT) 2>/dev/null | head -5 | while read pid; do kill -TERM $$pid 2>/dev/null || true; done; \
lsof -ti :$(VOLUME_PORT) 2>/dev/null | head -5 | while read pid; do kill -TERM $$pid 2>/dev/null || true; done; \
lsof -ti :$(FILER_PORT) 2>/dev/null | head -5 | while read pid; do kill -TERM $$pid 2>/dev/null || true; done; \
lsof -ti :$(S3_PORT) 2>/dev/null | head -5 | while read pid; do kill -TERM $$pid 2>/dev/null || true; done; \
lsof -ti :$$(( $(MASTER_PORT) + 10000 )) 2>/dev/null | head -5 | while read pid; do kill -TERM $$pid 2>/dev/null || true; done; \
lsof -ti :$$(( $(VOLUME_PORT) + 10000 )) 2>/dev/null | head -5 | while read pid; do kill -TERM $$pid 2>/dev/null || true; done; \
lsof -ti :$$(( $(FILER_PORT) + 10000 )) 2>/dev/null | head -5 | while read pid; do kill -TERM $$pid 2>/dev/null || true; done; \
else \
echo "lsof not available, using netstat approach..."; \
netstat -tlnp 2>/dev/null | grep :$(MASTER_PORT) | awk '{print $$7}' | cut -d/ -f1 | head -5 | while read pid; do [ "$$pid" != "-" ] && kill -TERM $$pid 2>/dev/null || true; done; \
netstat -tlnp 2>/dev/null | grep :$(VOLUME_PORT) | awk '{print $$7}' | cut -d/ -f1 | head -5 | while read pid; do [ "$$pid" != "-" ] && kill -TERM $$pid 2>/dev/null || true; done; \
netstat -tlnp 2>/dev/null | grep :$(FILER_PORT) | awk '{print $$7}' | cut -d/ -f1 | head -5 | while read pid; do [ "$$pid" != "-" ] && kill -TERM $$pid 2>/dev/null || true; done; \
netstat -tlnp 2>/dev/null | grep :$(S3_PORT) | awk '{print $$7}' | cut -d/ -f1 | head -5 | while read pid; do [ "$$pid" != "-" ] && kill -TERM $$pid 2>/dev/null || true; done; \
netstat -tlnp 2>/dev/null | grep :$$(( $(MASTER_PORT) + 10000 )) | awk '{print $$7}' | cut -d/ -f1 | head -5 | while read pid; do [ "$$pid" != "-" ] && kill -TERM $$pid 2>/dev/null || true; done; \
netstat -tlnp 2>/dev/null | grep :$$(( $(VOLUME_PORT) + 10000 )) | awk '{print $$7}' | cut -d/ -f1 | head -5 | while read pid; do [ "$$pid" != "-" ] && kill -TERM $$pid 2>/dev/null || true; done; \
netstat -tlnp 2>/dev/null | grep :$$(( $(FILER_PORT) + 10000 )) | awk '{print $$7}' | cut -d/ -f1 | head -5 | while read pid; do [ "$$pid" != "-" ] && kill -TERM $$pid 2>/dev/null || true; done; \
fi
@sleep 2
@echo "$(GREEN)SeaweedFS server safely stopped$(NC)"
clean:
@echo "$(YELLOW)Cleaning up Parquet test artifacts...$(NC)"
@rm -rf /tmp/seaweedfs-test-parquet-*
@rm -f /tmp/seaweedfs-parquet-*.log
@rm -f /tmp/seaweedfs-parquet-s3.json
@rm -f s3_parquet_test_errors_*.log
@rm -rf $(VENV_DIR)
@echo "$(GREEN)Parquet test cleanup completed$(NC)"
# Test with automatic server management (GitHub Actions compatible)
test-with-server: build-weed setup-python
@echo "🚀 Starting Parquet integration tests with automated server management..."
@echo "Starting SeaweedFS cluster..."
@if $(MAKE) start-seaweedfs-ci > weed-test.log 2>&1; then \
echo "✅ SeaweedFS cluster started successfully"; \
echo "Running Parquet integration tests..."; \
trap '$(MAKE) -C $(TEST_DIR) stop-seaweedfs-safe || true' EXIT; \
S3_ENDPOINT_URL=http://localhost:$(S3_PORT) \
S3_ACCESS_KEY=$(ACCESS_KEY) \
S3_SECRET_KEY=$(SECRET_KEY) \
BUCKET_NAME=$(BUCKET_NAME) \
$(VENV_DIR)/bin/$(PYTHON) $(PYTHON_TEST_SCRIPT) || exit 1; \
echo "✅ All tests completed successfully"; \
else \
echo "❌ Failed to start SeaweedFS cluster"; \
echo "=== Server startup logs ==="; \
tail -100 weed-test.log 2>/dev/null || echo "No startup log available"; \
echo "=== System information ==="; \
ps aux | grep -E "weed|make" | grep -v grep || echo "No relevant processes found"; \
exit 1; \
fi
# Run tests assuming SeaweedFS is already running
test: setup-python
@echo "$(YELLOW)Running Parquet integration tests...$(NC)"
@echo "$(YELLOW)Assuming SeaweedFS is already running on localhost:$(S3_PORT)$(NC)"
@S3_ENDPOINT_URL=http://localhost:$(S3_PORT) \
S3_ACCESS_KEY=$(ACCESS_KEY) \
S3_SECRET_KEY=$(SECRET_KEY) \
BUCKET_NAME=$(BUCKET_NAME) \
$(VENV_DIR)/bin/$(PYTHON) $(PYTHON_TEST_SCRIPT)
# Run quick tests with small files only
test-quick: setup-python
@echo "$(YELLOW)Running quick Parquet tests (small files only)...$(NC)"
@echo "$(YELLOW)Assuming SeaweedFS is already running on localhost:$(S3_PORT)$(NC)"
@S3_ENDPOINT_URL=http://localhost:$(S3_PORT) \
S3_ACCESS_KEY=$(ACCESS_KEY) \
S3_SECRET_KEY=$(SECRET_KEY) \
BUCKET_NAME=$(BUCKET_NAME) \
TEST_QUICK=1 \
$(VENV_DIR)/bin/$(PYTHON) $(PYTHON_TEST_SCRIPT)
# Test implicit directory fix for s3fs compatibility
test-implicit-dir: setup-python
@echo "$(YELLOW)Running implicit directory fix tests...$(NC)"
@echo "$(YELLOW)Assuming SeaweedFS is already running on localhost:$(S3_PORT)$(NC)"
@S3_ENDPOINT_URL=http://localhost:$(S3_PORT) \
S3_ACCESS_KEY=$(ACCESS_KEY) \
S3_SECRET_KEY=$(SECRET_KEY) \
BUCKET_NAME=test-implicit-dir \
$(VENV_DIR)/bin/$(PYTHON) test_implicit_directory_fix.py
# Test implicit directory fix with automatic server management
test-implicit-dir-with-server: build-weed setup-python
@echo "🚀 Starting implicit directory fix tests with automated server management..."
@echo "Starting SeaweedFS cluster..."
@if $(MAKE) start-seaweedfs-ci > weed-test.log 2>&1; then \
echo "✅ SeaweedFS cluster started successfully"; \
echo "Running implicit directory fix tests..."; \
trap '$(MAKE) -C $(TEST_DIR) stop-seaweedfs-safe || true' EXIT; \
S3_ENDPOINT_URL=http://localhost:$(S3_PORT) \
S3_ACCESS_KEY=$(ACCESS_KEY) \
S3_SECRET_KEY=$(SECRET_KEY) \
BUCKET_NAME=test-implicit-dir \
$(VENV_DIR)/bin/$(PYTHON) test_implicit_directory_fix.py || exit 1; \
echo "✅ All tests completed successfully"; \
else \
echo "❌ Failed to start SeaweedFS cluster"; \
echo "=== Server startup logs ==="; \
tail -100 weed-test.log 2>/dev/null || echo "No startup log available"; \
exit 1; \
fi
# Debug targets
debug-logs:
@echo "$(YELLOW)=== Master Log ===$(NC)"
@tail -n 50 /tmp/seaweedfs-parquet-master.log || echo "No master log found"
@echo "$(YELLOW)=== Volume Log ===$(NC)"
@tail -n 50 /tmp/seaweedfs-parquet-volume.log || echo "No volume log found"
@echo "$(YELLOW)=== Filer Log ===$(NC)"
@tail -n 50 /tmp/seaweedfs-parquet-filer.log || echo "No filer log found"
debug-status:
@echo "$(YELLOW)=== Process Status ===$(NC)"
@ps aux | grep -E "(weed|seaweedfs)" | grep -v grep || echo "No SeaweedFS processes found"
@echo "$(YELLOW)=== Port Status ===$(NC)"
@netstat -an | grep -E "($(MASTER_PORT)|$(VOLUME_PORT)|$(FILER_PORT)|$(S3_PORT))" || echo "No ports in use"
# Manual test targets for development
manual-start: start-seaweedfs
@echo "$(GREEN)SeaweedFS with S3 is now running for manual testing$(NC)"
@echo "You can now run Parquet tests manually"
@echo "Run 'make manual-stop' when finished"
manual-stop: stop-seaweedfs clean
# Test PyArrow's native S3 filesystem
test-native-s3: setup-python
@echo "$(YELLOW)Running PyArrow native S3 filesystem tests...$(NC)"
@echo "$(YELLOW)Assuming SeaweedFS is already running on localhost:$(S3_PORT)$(NC)"
@S3_ENDPOINT_URL=http://localhost:$(S3_PORT) \
S3_ACCESS_KEY=$(ACCESS_KEY) \
S3_SECRET_KEY=$(SECRET_KEY) \
BUCKET_NAME=$(BUCKET_NAME) \
$(VENV_DIR)/bin/$(PYTHON) test_pyarrow_native_s3.py
# Test PyArrow's native S3 filesystem with automatic server management
test-native-s3-with-server: build-weed setup-python
@echo "🚀 Starting PyArrow native S3 filesystem tests with automated server management..."
@echo "Starting SeaweedFS cluster..."
@if $(MAKE) start-seaweedfs-ci > weed-test.log 2>&1; then \
echo "✅ SeaweedFS cluster started successfully"; \
echo "Running PyArrow native S3 filesystem tests..."; \
trap '$(MAKE) -C $(TEST_DIR) stop-seaweedfs-safe || true' EXIT; \
S3_ENDPOINT_URL=http://localhost:$(S3_PORT) \
S3_ACCESS_KEY=$(ACCESS_KEY) \
S3_SECRET_KEY=$(SECRET_KEY) \
BUCKET_NAME=$(BUCKET_NAME) \
$(VENV_DIR)/bin/$(PYTHON) test_pyarrow_native_s3.py || exit 1; \
echo "✅ All tests completed successfully"; \
else \
echo "❌ Failed to start SeaweedFS cluster"; \
echo "=== Server startup logs ==="; \
tail -100 weed-test.log 2>/dev/null || echo "No startup log available"; \
exit 1; \
fi
# Test cross-filesystem compatibility (s3fs ↔ PyArrow native S3)
test-cross-fs: setup-python
@echo "$(YELLOW)Running cross-filesystem compatibility tests...$(NC)"
@echo "$(YELLOW)Assuming SeaweedFS is already running on localhost:$(S3_PORT)$(NC)"
@S3_ENDPOINT_URL=http://localhost:$(S3_PORT) \
S3_ACCESS_KEY=$(ACCESS_KEY) \
S3_SECRET_KEY=$(SECRET_KEY) \
BUCKET_NAME=$(BUCKET_NAME) \
$(VENV_DIR)/bin/$(PYTHON) test_cross_filesystem_compatibility.py
# Test cross-filesystem compatibility with automatic server management
test-cross-fs-with-server: build-weed setup-python
@echo "🚀 Starting cross-filesystem compatibility tests with automated server management..."
@echo "Starting SeaweedFS cluster..."
@if $(MAKE) start-seaweedfs-ci > weed-test.log 2>&1; then \
echo "✅ SeaweedFS cluster started successfully"; \
echo "Running cross-filesystem compatibility tests..."; \
trap '$(MAKE) -C $(TEST_DIR) stop-seaweedfs-safe || true' EXIT; \
S3_ENDPOINT_URL=http://localhost:$(S3_PORT) \
S3_ACCESS_KEY=$(ACCESS_KEY) \
S3_SECRET_KEY=$(SECRET_KEY) \
BUCKET_NAME=$(BUCKET_NAME) \
$(VENV_DIR)/bin/$(PYTHON) test_cross_filesystem_compatibility.py || exit 1; \
echo "✅ All tests completed successfully"; \
else \
echo "❌ Failed to start SeaweedFS cluster"; \
echo "=== Server startup logs ==="; \
tail -100 weed-test.log 2>/dev/null || echo "No startup log available"; \
exit 1; \
fi
# Test PyArrow's native S3 filesystem compatibility with SSE-S3 enabled backend
# (For encryption-specific validation, use test-sse-s3-compat)
test-native-s3-with-sse: build-weed setup-python
@echo "🚀 Testing PyArrow native S3 compatibility with SSE-S3 enabled backend..."
@echo "Starting SeaweedFS cluster with SSE-S3 enabled..."
@if $(MAKE) start-seaweedfs-ci ENABLE_SSE_S3=true > weed-test-sse.log 2>&1; then \
echo "✅ SeaweedFS cluster started successfully with SSE-S3"; \
echo "Running PyArrow native S3 filesystem tests with SSE-S3..."; \
trap '$(MAKE) -C $(TEST_DIR) stop-seaweedfs-safe || true' EXIT; \
S3_ENDPOINT_URL=http://localhost:$(S3_PORT) \
S3_ACCESS_KEY=$(ACCESS_KEY) \
S3_SECRET_KEY=$(SECRET_KEY) \
BUCKET_NAME=$(BUCKET_NAME) \
$(VENV_DIR)/bin/$(PYTHON) test_pyarrow_native_s3.py || exit 1; \
echo "✅ All SSE-S3 tests completed successfully"; \
else \
echo "❌ Failed to start SeaweedFS cluster with SSE-S3"; \
echo "=== Server startup logs ==="; \
tail -100 weed-test-sse.log 2>/dev/null || echo "No startup log available"; \
exit 1; \
fi
# Comprehensive SSE-S3 compatibility test
test-sse-s3-compat: build-weed setup-python
@echo "🚀 Starting comprehensive SSE-S3 compatibility tests..."
@echo "Starting SeaweedFS cluster with SSE-S3 enabled..."
@if $(MAKE) start-seaweedfs-ci ENABLE_SSE_S3=true > weed-test-sse-compat.log 2>&1; then \
echo "✅ SeaweedFS cluster started successfully with SSE-S3"; \
echo "Running comprehensive SSE-S3 compatibility tests..."; \
trap '$(MAKE) -C $(TEST_DIR) stop-seaweedfs-safe || true' EXIT; \
S3_ENDPOINT_URL=http://localhost:$(S3_PORT) \
S3_ACCESS_KEY=$(ACCESS_KEY) \
S3_SECRET_KEY=$(SECRET_KEY) \
BUCKET_NAME=$(BUCKET_NAME) \
$(VENV_DIR)/bin/$(PYTHON) test_sse_s3_compatibility.py || exit 1; \
echo "✅ All SSE-S3 compatibility tests completed successfully"; \
else \
echo "❌ Failed to start SeaweedFS cluster with SSE-S3"; \
echo "=== Server startup logs ==="; \
tail -100 weed-test-sse-compat.log 2>/dev/null || echo "No startup log available"; \
exit 1; \
fi
# CI/CD targets
ci-test: test-with-server