diff --git a/.github/workflows/deploy_telemetry.yml b/.github/workflows/deploy_telemetry.yml new file mode 100644 index 000000000..8f10af0ce --- /dev/null +++ b/.github/workflows/deploy_telemetry.yml @@ -0,0 +1,157 @@ +# This workflow will build and deploy the SeaweedFS telemetry server +# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-go + +name: Deploy Telemetry Server + +on: + push: + branches: [ "master" ] + paths: + - 'telemetry/**' + workflow_dispatch: + inputs: + setup: + description: 'Run first-time server setup' + required: true + type: boolean + default: false + deploy: + description: 'Deploy telemetry server to remote server' + required: true + type: boolean + default: false + +jobs: + deploy: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Set up Go + uses: actions/setup-go@v4 + with: + go-version: '1.24' + + - name: Build Telemetry Server + run: | + go mod tidy + cd telemetry/server + GOOS=linux GOARCH=amd64 go build -o telemetry-server main.go + + - name: First-time Server Setup + if: github.event_name == 'workflow_dispatch' && inputs.setup + env: + SSH_PRIVATE_KEY: ${{ secrets.TELEMETRY_SSH_PRIVATE_KEY }} + REMOTE_HOST: ${{ secrets.TELEMETRY_HOST }} + REMOTE_USER: ${{ secrets.TELEMETRY_USER }} + run: | + mkdir -p ~/.ssh + echo "$SSH_PRIVATE_KEY" > ~/.ssh/deploy_key + chmod 600 ~/.ssh/deploy_key + echo "Host *" > ~/.ssh/config + echo " StrictHostKeyChecking no" >> ~/.ssh/config + + # Create all required directories with proper permissions + ssh -i ~/.ssh/deploy_key $REMOTE_USER@$REMOTE_HOST " + mkdir -p ~/seaweedfs-telemetry/bin ~/seaweedfs-telemetry/logs ~/seaweedfs-telemetry/data ~/seaweedfs-telemetry/tmp && \ + chmod 755 ~/seaweedfs-telemetry/logs && \ + chmod 755 ~/seaweedfs-telemetry/data && \ + touch ~/seaweedfs-telemetry/logs/telemetry.log ~/seaweedfs-telemetry/logs/telemetry.error.log && \ + chmod 644 ~/seaweedfs-telemetry/logs/*.log" + + # Create systemd service file + echo " + [Unit] + Description=SeaweedFS Telemetry Server + After=network.target + + [Service] + Type=simple + User=$REMOTE_USER + WorkingDirectory=/home/$REMOTE_USER/seaweedfs-telemetry + ExecStart=/home/$REMOTE_USER/seaweedfs-telemetry/bin/telemetry-server -port=8353 + Restart=always + RestartSec=5 + StandardOutput=append:/home/$REMOTE_USER/seaweedfs-telemetry/logs/telemetry.log + StandardError=append:/home/$REMOTE_USER/seaweedfs-telemetry/logs/telemetry.error.log + + [Install] + WantedBy=multi-user.target" > telemetry.service + + # Setup logrotate configuration + echo "# SeaweedFS Telemetry service log rotation + /home/$REMOTE_USER/seaweedfs-telemetry/logs/*.log { + daily + rotate 30 + compress + delaycompress + missingok + notifempty + create 644 $REMOTE_USER $REMOTE_USER + postrotate + systemctl restart telemetry.service + endscript + }" > telemetry_logrotate + + # Copy Grafana dashboard and Prometheus config + scp -i ~/.ssh/deploy_key telemetry/grafana-dashboard.json $REMOTE_USER@$REMOTE_HOST:~/seaweedfs-telemetry/ + scp -i ~/.ssh/deploy_key telemetry/prometheus.yml $REMOTE_USER@$REMOTE_HOST:~/seaweedfs-telemetry/ + + # Copy and install service and logrotate files + scp -i ~/.ssh/deploy_key telemetry.service telemetry_logrotate $REMOTE_USER@$REMOTE_HOST:~/seaweedfs-telemetry/ + ssh -i ~/.ssh/deploy_key $REMOTE_USER@$REMOTE_HOST " + sudo mv ~/seaweedfs-telemetry/telemetry.service /etc/systemd/system/ && \ + sudo mv ~/seaweedfs-telemetry/telemetry_logrotate /etc/logrotate.d/seaweedfs-telemetry && \ + sudo systemctl daemon-reload && \ + sudo systemctl enable telemetry.service" + + rm -f ~/.ssh/deploy_key + + - name: Deploy Telemetry Server to Remote Server + if: (github.event_name == 'push' && contains(github.ref, 'refs/heads/master')) || (github.event_name == 'workflow_dispatch' && inputs.deploy) + env: + SSH_PRIVATE_KEY: ${{ secrets.TELEMETRY_SSH_PRIVATE_KEY }} + REMOTE_HOST: ${{ secrets.TELEMETRY_HOST }} + REMOTE_USER: ${{ secrets.TELEMETRY_USER }} + run: | + mkdir -p ~/.ssh + echo "$SSH_PRIVATE_KEY" > ~/.ssh/deploy_key + chmod 600 ~/.ssh/deploy_key + echo "Host *" > ~/.ssh/config + echo " StrictHostKeyChecking no" >> ~/.ssh/config + + # Create temp directory and copy binary + ssh -i ~/.ssh/deploy_key $REMOTE_USER@$REMOTE_HOST "mkdir -p ~/seaweedfs-telemetry/tmp" + scp -i ~/.ssh/deploy_key telemetry/server/telemetry-server $REMOTE_USER@$REMOTE_HOST:~/seaweedfs-telemetry/tmp/ + + # Copy updated configuration files + scp -i ~/.ssh/deploy_key telemetry/grafana-dashboard.json $REMOTE_USER@$REMOTE_HOST:~/seaweedfs-telemetry/ + scp -i ~/.ssh/deploy_key telemetry/prometheus.yml $REMOTE_USER@$REMOTE_HOST:~/seaweedfs-telemetry/ + + # Stop service, move binary, and restart + ssh -i ~/.ssh/deploy_key $REMOTE_USER@$REMOTE_HOST " + sudo systemctl stop telemetry.service || true && \ + mkdir -p ~/seaweedfs-telemetry/bin && \ + mv ~/seaweedfs-telemetry/tmp/telemetry-server ~/seaweedfs-telemetry/bin/ && \ + chmod +x ~/seaweedfs-telemetry/bin/telemetry-server && \ + sudo systemctl start telemetry.service && \ + sudo systemctl status telemetry.service" + + # Verify deployment + ssh -i ~/.ssh/deploy_key $REMOTE_USER@$REMOTE_HOST " + echo 'Waiting for service to start...' + sleep 5 + curl -f http://localhost:8353/health || echo 'Health check failed'" + + rm -f ~/.ssh/deploy_key + + - name: Notify Deployment Status + if: always() + run: | + if [ "${{ job.status }}" == "success" ]; then + echo "✅ Telemetry server deployment successful" + echo "Dashboard: http://${{ secrets.TELEMETRY_HOST }}:8353" + echo "Metrics: http://${{ secrets.TELEMETRY_HOST }}:8353/metrics" + else + echo "❌ Telemetry server deployment failed" + fi \ No newline at end of file diff --git a/telemetry/DEPLOYMENT.md b/telemetry/DEPLOYMENT.md new file mode 100644 index 000000000..d5cd69154 --- /dev/null +++ b/telemetry/DEPLOYMENT.md @@ -0,0 +1,271 @@ +# SeaweedFS Telemetry Server Deployment + +This document describes how to deploy the SeaweedFS telemetry server to a remote server using GitHub Actions. + +## Prerequisites + +1. A remote Linux server with: + - SSH access + - systemd (for service management) + - Optional: Prometheus and Grafana (for monitoring) + +2. GitHub repository secrets configured (see [Setup GitHub Secrets](#setup-github-secrets) below): + - `TELEMETRY_SSH_PRIVATE_KEY`: SSH private key for accessing the remote server + - `TELEMETRY_HOST`: Remote server hostname or IP address + - `TELEMETRY_USER`: Username for SSH access + +## Setup GitHub Secrets + +Before using the deployment workflow, you need to configure the required secrets in your GitHub repository. + +### Step 1: Generate SSH Key Pair + +On your local machine, generate a new SSH key pair specifically for deployment: + +```bash +# Generate a new SSH key pair +ssh-keygen -t ed25519 -C "seaweedfs-telemetry-deploy" -f ~/.ssh/seaweedfs_telemetry_deploy + +# This creates two files: +# ~/.ssh/seaweedfs_telemetry_deploy (private key) +# ~/.ssh/seaweedfs_telemetry_deploy.pub (public key) +``` + +### Step 2: Configure Remote Server + +Copy the public key to your remote server: + +```bash +# Copy public key to remote server +ssh-copy-id -i ~/.ssh/seaweedfs_telemetry_deploy.pub user@your-server.com + +# Or manually append to authorized_keys +cat ~/.ssh/seaweedfs_telemetry_deploy.pub | ssh user@your-server.com "mkdir -p ~/.ssh && cat >> ~/.ssh/authorized_keys" +``` + +Test the SSH connection: + +```bash +# Test SSH connection with the new key +ssh -i ~/.ssh/seaweedfs_telemetry_deploy user@your-server.com "echo 'SSH connection successful'" +``` + +### Step 3: Add Secrets to GitHub Repository + +1. Go to your GitHub repository +2. Click on **Settings** tab +3. In the sidebar, click **Secrets and variables** → **Actions** +4. Click **New repository secret** for each of the following: + +#### TELEMETRY_SSH_PRIVATE_KEY + +```bash +# Display the private key content +cat ~/.ssh/seaweedfs_telemetry_deploy +``` + +- **Name**: `TELEMETRY_SSH_PRIVATE_KEY` +- **Value**: Copy the entire private key content, including the `-----BEGIN OPENSSH PRIVATE KEY-----` and `-----END OPENSSH PRIVATE KEY-----` lines + +#### TELEMETRY_HOST + +- **Name**: `TELEMETRY_HOST` +- **Value**: Your server's hostname or IP address (e.g., `telemetry.example.com` or `192.168.1.100`) + +#### TELEMETRY_USER + +- **Name**: `TELEMETRY_USER` +- **Value**: The username on the remote server (e.g., `ubuntu`, `deploy`, or your username) + +### Step 4: Verify Configuration + +Create a simple test workflow or manually trigger the deployment to verify the secrets are working correctly. + +### Security Best Practices + +1. **Dedicated SSH Key**: Use a separate SSH key only for deployment +2. **Limited Permissions**: Create a dedicated user on the remote server with minimal required permissions +3. **Key Rotation**: Regularly rotate SSH keys +4. **Server Access**: Restrict SSH access to specific IP ranges if possible + +### Example Server Setup + +If you're setting up a new server, here's a basic configuration: + +```bash +# On the remote server, create a dedicated user for deployment +sudo useradd -m -s /bin/bash seaweedfs-deploy +sudo usermod -aG sudo seaweedfs-deploy # Only if sudo access is needed + +# Switch to the deployment user +sudo su - seaweedfs-deploy + +# Create SSH directory +mkdir -p ~/.ssh +chmod 700 ~/.ssh + +# Add your public key (paste the content of seaweedfs_telemetry_deploy.pub) +nano ~/.ssh/authorized_keys +chmod 600 ~/.ssh/authorized_keys +``` + +### Troubleshooting + +#### SSH Connection Issues + +```bash +# Test SSH connection manually +ssh -i ~/.ssh/seaweedfs_telemetry_deploy -v user@your-server.com + +# Check SSH key permissions +ls -la ~/.ssh/seaweedfs_telemetry_deploy* +# Should show: -rw------- for private key, -rw-r--r-- for public key +``` + +#### GitHub Actions Fails + +1. **Check secrets**: Ensure all three secrets are properly set in GitHub +2. **Verify SSH key**: Make sure the entire private key (including headers/footers) is copied +3. **Test connectivity**: Manually SSH to the server from your local machine +4. **Check user permissions**: Ensure the remote user has necessary permissions + +## GitHub Actions Workflow + +The deployment workflow (`.github/workflows/deploy_telemetry.yml`) provides two main operations: + +### 1. First-time Setup + +Run this once to set up the remote server: + +1. Go to GitHub Actions in your repository +2. Select "Deploy Telemetry Server" workflow +3. Click "Run workflow" +4. Check "Run first-time server setup" +5. Click "Run workflow" + +This will: +- Create necessary directories on the remote server +- Set up systemd service configuration +- Configure log rotation +- Upload Grafana dashboard and Prometheus configuration + + +### 2. Deploy Updates + +Deployments happen automatically when: +- Code is pushed to the `master` branch with changes in the `telemetry/` directory + +Or manually trigger deployment: +1. Go to GitHub Actions in your repository +2. Select "Deploy Telemetry Server" workflow +3. Click "Run workflow" +4. Check "Deploy telemetry server to remote server" +5. Click "Run workflow" + +## Server Directory Structure + +After setup, the remote server will have: + +``` +~/seaweedfs-telemetry/ +├── bin/ +│ └── telemetry-server # Binary executable +├── logs/ +│ ├── telemetry.log # Application logs +│ └── telemetry.error.log # Error logs +├── data/ # Data directory (if needed) +├── grafana-dashboard.json # Grafana dashboard configuration +└── prometheus.yml # Prometheus configuration +``` + +## Service Management + +The telemetry server runs as a systemd service: + +```bash +# Check service status +sudo systemctl status telemetry.service + +# View logs +sudo journalctl -u telemetry.service -f + +# Restart service +sudo systemctl restart telemetry.service + +# Stop/start service +sudo systemctl stop telemetry.service +sudo systemctl start telemetry.service +``` + +## Accessing the Service + +After deployment, the telemetry server will be available at: + +- **Dashboard**: `http://your-server:8353` +- **API**: `http://your-server:8353/api/*` +- **Metrics**: `http://your-server:8353/metrics` +- **Health Check**: `http://your-server:8353/health` + +## Optional: Prometheus and Grafana Integration + +### Prometheus Setup + +1. Install Prometheus on your server +2. Update `/etc/prometheus/prometheus.yml` to include: + ```yaml + scrape_configs: + - job_name: 'seaweedfs-telemetry' + static_configs: + - targets: ['localhost:8353'] + metrics_path: '/metrics' + ``` + +### Grafana Setup + +1. Install Grafana on your server +2. Import the dashboard from `~/seaweedfs-telemetry/grafana-dashboard.json` +3. Configure Prometheus as a data source pointing to your Prometheus instance + +## Troubleshooting + +### Deployment Fails + +1. Check GitHub Actions logs for detailed error messages +2. Verify SSH connectivity: `ssh user@host` +3. Ensure all required secrets are configured in GitHub + +### Service Won't Start + +1. Check service logs: `sudo journalctl -u telemetry.service` +2. Verify binary permissions: `ls -la ~/seaweedfs-telemetry/bin/` +3. Test binary manually: `~/seaweedfs-telemetry/bin/telemetry-server -help` + +### Port Conflicts + +If port 8353 is already in use: + +1. Edit the systemd service: `sudo systemctl edit telemetry.service` +2. Add override configuration: + ```ini + [Service] + ExecStart= + ExecStart=/home/user/seaweedfs-telemetry/bin/telemetry-server -port=8354 + ``` +3. Reload and restart: `sudo systemctl daemon-reload && sudo systemctl restart telemetry.service` + +## Security Considerations + +1. **Firewall**: Consider restricting access to telemetry ports +2. **SSH Keys**: Use dedicated SSH keys with minimal permissions +3. **User Permissions**: Run the service as a non-privileged user +4. **Network**: Consider running on internal networks only + +## Monitoring + +Monitor the deployment and service health: + +- **GitHub Actions**: Check workflow runs for deployment status +- **System Logs**: `sudo journalctl -u telemetry.service` +- **Application Logs**: `tail -f ~/seaweedfs-telemetry/logs/telemetry.log` +- **Health Endpoint**: `curl http://localhost:8353/health` +- **Metrics**: `curl http://localhost:8353/metrics` \ No newline at end of file diff --git a/telemetry/README.md b/telemetry/README.md new file mode 100644 index 000000000..aee050943 --- /dev/null +++ b/telemetry/README.md @@ -0,0 +1,351 @@ +# SeaweedFS Telemetry System + +A privacy-respecting telemetry system for SeaweedFS that collects cluster-level usage statistics and provides visualization through Prometheus and Grafana. + +## Features + +- **Privacy-First Design**: Uses in-memory cluster IDs (regenerated on restart), no personal data collection +- **Prometheus Integration**: Native Prometheus metrics for monitoring and alerting +- **Grafana Dashboards**: Pre-built dashboards for data visualization +- **Protocol Buffers**: Efficient binary data transmission for optimal performance +- **Opt-in Only**: Disabled by default, requires explicit configuration +- **Docker Compose**: Complete monitoring stack deployment +- **Automatic Cleanup**: Configurable data retention policies + +## Architecture + +``` +SeaweedFS Cluster → Telemetry Client → Telemetry Server → Prometheus → Grafana + (protobuf) (metrics) (queries) +``` + +## Data Transmission + +The telemetry system uses **Protocol Buffers exclusively** for efficient binary data transmission: + +- **Compact Format**: 30-50% smaller than JSON +- **Fast Serialization**: Better performance than text-based formats +- **Type Safety**: Strong typing with generated Go structs +- **Schema Evolution**: Built-in versioning support + +### Protobuf Schema + +```protobuf +message TelemetryData { + string cluster_id = 1; // In-memory generated UUID + string version = 2; // SeaweedFS version + string os = 3; // Operating system + repeated string features = 4; // Enabled features + string deployment = 5; // Deployment type + int32 volume_server_count = 6; // Number of volume servers + uint64 total_disk_bytes = 7; // Total disk usage + int32 total_volume_count = 8; // Total volume count + int64 timestamp = 9; // Collection timestamp +} +``` + +## Privacy Approach + +- **No Personal Data**: No hostnames, IP addresses, or user information +- **In-Memory IDs**: Cluster IDs are generated in-memory and change on restart +- **Aggregated Data**: Only cluster-level statistics, no individual file/user data +- **Opt-in Only**: Telemetry is disabled by default +- **Transparent**: Open source implementation, clear data collection policy + +## Collected Data + +| Field | Description | Example | +|-------|-------------|---------| +| `cluster_id` | In-memory UUID (changes on restart) | `a1b2c3d4-...` | +| `version` | SeaweedFS version | `3.45` | +| `os` | Operating system and architecture | `linux/amd64` | +| `features` | Enabled components | `["filer", "s3api"]` | +| `deployment` | Deployment type | `cluster` | +| `volume_server_count` | Number of volume servers | `5` | +| `total_disk_bytes` | Total disk usage across cluster | `1073741824` | +| `total_volume_count` | Total number of volumes | `120` | +| `timestamp` | When data was collected | `1640995200` | + +## Quick Start + +### 1. Deploy Telemetry Server + +```bash +# Clone and start the complete monitoring stack +git clone https://github.com/seaweedfs/seaweedfs.git +cd seaweedfs/telemetry +docker-compose up -d + +# Or run the server directly +cd server +go run . -port=8080 -dashboard=true +``` + +### 2. Configure SeaweedFS + +```bash +# Enable telemetry in SeaweedFS master (uses default telemetry.seaweedfs.com:3091) +weed master -telemetry=true + +# Or in server mode +weed server -telemetry=true + +# Or specify custom telemetry server +weed master -telemetry=true -telemetry.url=http://localhost:8080/api/collect +``` + +### 3. Access Dashboards + +- **Telemetry Server**: http://localhost:8080 +- **Prometheus**: http://localhost:9090 +- **Grafana**: http://localhost:3000 (admin/admin) + +## Configuration + +### SeaweedFS Master/Server + +```bash +# Enable telemetry +-telemetry=true + +# Set custom telemetry server URL (optional, defaults to telemetry.seaweedfs.com:3091) +-telemetry.url=http://your-telemetry-server:8080/api/collect +``` + +### Telemetry Server + +```bash +# Server configuration +-port=8080 # Server port +-dashboard=true # Enable built-in dashboard +-cleanup=24h # Cleanup interval +-max-age=720h # Maximum data retention (30 days) + +# Example +./telemetry-server -port=8080 -dashboard=true -cleanup=24h -max-age=720h +``` + +## Prometheus Metrics + +The telemetry server exposes these Prometheus metrics: + +### Cluster Metrics +- `seaweedfs_telemetry_total_clusters`: Total unique clusters (30 days) +- `seaweedfs_telemetry_active_clusters`: Active clusters (7 days) + +### Per-Cluster Metrics +- `seaweedfs_telemetry_volume_servers{cluster_id, version, os, deployment}`: Volume servers per cluster +- `seaweedfs_telemetry_disk_bytes{cluster_id, version, os, deployment}`: Disk usage per cluster +- `seaweedfs_telemetry_volume_count{cluster_id, version, os, deployment}`: Volume count per cluster +- `seaweedfs_telemetry_filer_count{cluster_id, version, os, deployment}`: Filer servers per cluster +- `seaweedfs_telemetry_broker_count{cluster_id, version, os, deployment}`: Broker servers per cluster +- `seaweedfs_telemetry_cluster_info{cluster_id, version, os, deployment, features}`: Cluster metadata + +### Server Metrics +- `seaweedfs_telemetry_reports_received_total`: Total telemetry reports received + +## API Endpoints + +### Data Collection +```bash +# Submit telemetry data (protobuf only) +POST /api/collect +Content-Type: application/x-protobuf +[TelemetryRequest protobuf data] +``` + +### Statistics (JSON for dashboard/debugging) +```bash +# Get aggregated statistics +GET /api/stats + +# Get recent cluster instances +GET /api/instances?limit=100 + +# Get metrics over time +GET /api/metrics?days=30 +``` + +### Monitoring +```bash +# Prometheus metrics +GET /metrics +``` + +## Docker Deployment + +### Complete Stack (Recommended) + +```yaml +# docker-compose.yml +version: '3.8' +services: + telemetry-server: + build: ./server + ports: + - "8080:8080" + command: ["-port=8080", "-dashboard=true", "-cleanup=24h"] + + prometheus: + image: prom/prometheus:latest + ports: + - "9090:9090" + volumes: + - ./prometheus.yml:/etc/prometheus/prometheus.yml + + grafana: + image: grafana/grafana:latest + ports: + - "3000:3000" + environment: + - GF_SECURITY_ADMIN_PASSWORD=admin + volumes: + - ./grafana-provisioning:/etc/grafana/provisioning + - ./grafana-dashboard.json:/var/lib/grafana/dashboards/seaweedfs.json +``` + +```bash +# Deploy the stack +docker-compose up -d + +# Scale telemetry server if needed +docker-compose up -d --scale telemetry-server=3 +``` + +### Server Only + +```bash +# Build and run telemetry server +cd server +docker build -t seaweedfs-telemetry . +docker run -p 8080:8080 seaweedfs-telemetry -port=8080 -dashboard=true +``` + +## Development + +### Protocol Buffer Development + +```bash +# Generate protobuf code +cd telemetry +protoc --go_out=. --go_opt=paths=source_relative proto/telemetry.proto + +# The generated code is already included in the repository +``` + +### Build from Source + +```bash +# Build telemetry server +cd telemetry/server +go build -o telemetry-server . + +# Build SeaweedFS with telemetry support +cd ../.. +go build -o weed ./weed +``` + +### Testing + +```bash +# Test telemetry server +cd telemetry/server +go test ./... + +# Test protobuf communication (requires protobuf tools) +# See telemetry client code for examples +``` + +## Grafana Dashboard + +The included Grafana dashboard provides: + +- **Overview**: Total and active clusters, version distribution +- **Resource Usage**: Volume servers and disk usage over time +- **Deployments**: Deployment type and OS distribution +- **Growth Trends**: Historical growth patterns + +### Custom Queries + +```promql +# Total active clusters +seaweedfs_telemetry_active_clusters + +# Disk usage by version +sum by (version) (seaweedfs_telemetry_disk_bytes) + +# Volume servers by deployment type +sum by (deployment) (seaweedfs_telemetry_volume_servers) + +# Filer servers by version +sum by (version) (seaweedfs_telemetry_filer_count) + +# Broker servers across all clusters +sum(seaweedfs_telemetry_broker_count) + +# Growth rate (weekly) +increase(seaweedfs_telemetry_total_clusters[7d]) +``` + +## Security Considerations + +- **Network Security**: Use HTTPS in production environments +- **Access Control**: Implement authentication for Grafana and Prometheus +- **Data Retention**: Configure appropriate retention policies +- **Monitoring**: Monitor the telemetry infrastructure itself + +## Troubleshooting + +### Common Issues + +**SeaweedFS not sending data:** +```bash +# Check telemetry configuration +weed master -h | grep telemetry + +# Verify connectivity +curl -v http://your-telemetry-server:8080/api/collect +``` + +**Server not receiving data:** +```bash +# Check server logs +docker-compose logs telemetry-server + +# Verify metrics endpoint +curl http://localhost:8080/metrics +``` + +**Prometheus not scraping:** +```bash +# Check Prometheus targets +curl http://localhost:9090/api/v1/targets + +# Verify configuration +docker-compose logs prometheus +``` + +### Debugging + +```bash +# Enable verbose logging in SeaweedFS +weed master -v=2 -telemetry=true + +# Check telemetry server metrics +curl http://localhost:8080/metrics | grep seaweedfs_telemetry + +# Test data flow +curl http://localhost:8080/api/stats +``` + +## Contributing + +1. Fork the repository +2. Create a feature branch +3. Make your changes +4. Add tests if applicable +5. Submit a pull request + +## License + +This telemetry system is part of SeaweedFS and follows the same Apache 2.0 license. \ No newline at end of file diff --git a/telemetry/docker-compose.yml b/telemetry/docker-compose.yml new file mode 100644 index 000000000..73f0e8f70 --- /dev/null +++ b/telemetry/docker-compose.yml @@ -0,0 +1,55 @@ +version: '3.8' + +services: + telemetry-server: + build: ./server + ports: + - "8080:8080" + command: [ + "./telemetry-server", + "-port=8080", + "-dashboard=false", # Disable built-in dashboard, use Grafana + "-log=true", + "-cors=true" + ] + networks: + - telemetry + + prometheus: + image: prom/prometheus:latest + ports: + - "9090:9090" + volumes: + - ./prometheus.yml:/etc/prometheus/prometheus.yml + - prometheus_data:/prometheus + command: + - '--config.file=/etc/prometheus/prometheus.yml' + - '--storage.tsdb.path=/prometheus' + - '--web.console.libraries=/etc/prometheus/console_libraries' + - '--web.console.templates=/etc/prometheus/consoles' + - '--storage.tsdb.retention.time=200h' + - '--web.enable-lifecycle' + networks: + - telemetry + + grafana: + image: grafana/grafana:latest + ports: + - "3000:3000" + environment: + - GF_SECURITY_ADMIN_PASSWORD=admin + - GF_USERS_ALLOW_SIGN_UP=false + volumes: + - grafana_data:/var/lib/grafana + - ./grafana-dashboard.json:/var/lib/grafana/dashboards/seaweedfs-telemetry.json + - ./grafana-provisioning:/etc/grafana/provisioning + networks: + - telemetry + +volumes: + prometheus_data: + grafana_data: + +networks: + telemetry: + driver: bridge \ No newline at end of file diff --git a/telemetry/grafana-dashboard.json b/telemetry/grafana-dashboard.json new file mode 100644 index 000000000..c33896dab --- /dev/null +++ b/telemetry/grafana-dashboard.json @@ -0,0 +1,734 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": null, + "links": [], + "liveNow": false, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "cellOptions": { + "type": "auto" + }, + "inspect": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 0 + }, + "id": 1, + "options": { + "showHeader": true + }, + "pluginVersion": "10.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "seaweedfs_telemetry_total_clusters", + "format": "time_series", + "refId": "A" + } + ], + "title": "Total SeaweedFS Clusters", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "cellOptions": { + "type": "auto" + }, + "inspect": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 0 + }, + "id": 2, + "options": { + "showHeader": true + }, + "pluginVersion": "10.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "seaweedfs_telemetry_active_clusters", + "format": "time_series", + "refId": "A" + } + ], + "title": "Active Clusters (7 days)", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "vis": false + } + }, + "mappings": [] + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 8 + }, + "id": 3, + "options": { + "legend": { + "displayMode": "visible", + "placement": "bottom", + "showLegend": true + }, + "pieType": "pie", + "reduceOptions": { + "values": false, + "calcs": [ + "lastNotNull" + ], + "fields": "" + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "count by (version) (seaweedfs_telemetry_cluster_info)", + "format": "time_series", + "legendFormat": "{{version}}", + "refId": "A" + } + ], + "title": "SeaweedFS Version Distribution", + "type": "piechart" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "vis": false + } + }, + "mappings": [] + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 8 + }, + "id": 4, + "options": { + "legend": { + "displayMode": "visible", + "placement": "bottom", + "showLegend": true + }, + "pieType": "pie", + "reduceOptions": { + "values": false, + "calcs": [ + "lastNotNull" + ], + "fields": "" + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "count by (os) (seaweedfs_telemetry_cluster_info)", + "format": "time_series", + "legendFormat": "{{os}}", + "refId": "A" + } + ], + "title": "Operating System Distribution", + "type": "piechart" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "vis": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 16 + }, + "id": 5, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "sum(seaweedfs_telemetry_volume_servers)", + "format": "time_series", + "legendFormat": "Total Volume Servers", + "refId": "A" + } + ], + "title": "Total Volume Servers Over Time", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "vis": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 24 + }, + "id": 6, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "sum(seaweedfs_telemetry_disk_bytes)", + "format": "time_series", + "legendFormat": "Total Disk Usage", + "refId": "A" + } + ], + "title": "Total Disk Usage Over Time", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "vis": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 24 + }, + "id": 7, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "sum(seaweedfs_telemetry_volume_count)", + "format": "time_series", + "legendFormat": "Total Volume Count", + "refId": "A" + } + ], + "title": "Total Volume Count Over Time", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "vis": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 32 + }, + "id": 8, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "sum(seaweedfs_telemetry_filer_count)", + "format": "time_series", + "legendFormat": "Total Filer Count", + "refId": "A" + } + ], + "title": "Total Filer Servers Over Time", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "vis": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 32 + }, + "id": 9, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "sum(seaweedfs_telemetry_broker_count)", + "format": "time_series", + "legendFormat": "Total Broker Count", + "refId": "A" + } + ], + "title": "Total Broker Servers Over Time", + "type": "timeseries" + } + ], + "refresh": "5m", + "schemaVersion": 38, + "style": "dark", + "tags": [ + "seaweedfs", + "telemetry" + ], + "templating": { + "list": [] + }, + "time": { + "from": "now-24h", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "SeaweedFS Telemetry Dashboard", + "uid": "seaweedfs-telemetry", + "version": 1, + "weekStart": "" +} \ No newline at end of file diff --git a/telemetry/grafana-provisioning/dashboards/dashboards.yml b/telemetry/grafana-provisioning/dashboards/dashboards.yml new file mode 100644 index 000000000..82fd18a7a --- /dev/null +++ b/telemetry/grafana-provisioning/dashboards/dashboards.yml @@ -0,0 +1,12 @@ +apiVersion: 1 + +providers: + - name: 'seaweedfs' + orgId: 1 + folder: '' + type: file + disableDeletion: false + updateIntervalSeconds: 10 + allowUiUpdates: true + options: + path: /var/lib/grafana/dashboards \ No newline at end of file diff --git a/telemetry/grafana-provisioning/datasources/prometheus.yml b/telemetry/grafana-provisioning/datasources/prometheus.yml new file mode 100644 index 000000000..38fb02c68 --- /dev/null +++ b/telemetry/grafana-provisioning/datasources/prometheus.yml @@ -0,0 +1,9 @@ +apiVersion: 1 + +datasources: + - name: Prometheus + type: prometheus + access: proxy + url: http://prometheus:9090 + isDefault: true + editable: true \ No newline at end of file diff --git a/telemetry/prometheus.yml b/telemetry/prometheus.yml new file mode 100644 index 000000000..e33d518e7 --- /dev/null +++ b/telemetry/prometheus.yml @@ -0,0 +1,15 @@ +global: + scrape_interval: 15s + evaluation_interval: 15s + +rule_files: + # - "first_rules.yml" + # - "second_rules.yml" + +scrape_configs: + - job_name: 'seaweedfs-telemetry' + static_configs: + - targets: ['telemetry-server:8080'] + scrape_interval: 30s + metrics_path: '/metrics' + scrape_timeout: 10s \ No newline at end of file diff --git a/telemetry/proto/telemetry.pb.go b/telemetry/proto/telemetry.pb.go new file mode 100644 index 000000000..b1bf44db7 --- /dev/null +++ b/telemetry/proto/telemetry.pb.go @@ -0,0 +1,398 @@ +// Code generated by protoc-gen-go. DO NOT EDIT. +// versions: +// protoc-gen-go v1.34.2 +// protoc v5.29.3 +// source: proto/telemetry.proto + +package proto + +import ( + protoreflect "google.golang.org/protobuf/reflect/protoreflect" + protoimpl "google.golang.org/protobuf/runtime/protoimpl" + reflect "reflect" + sync "sync" +) + +const ( + // Verify that this generated code is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(20 - protoimpl.MinVersion) + // Verify that runtime/protoimpl is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20) +) + +// TelemetryData represents cluster-level telemetry information +type TelemetryData struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + // Unique cluster identifier (generated in-memory) + ClusterId string `protobuf:"bytes,1,opt,name=cluster_id,json=clusterId,proto3" json:"cluster_id,omitempty"` + // SeaweedFS version + Version string `protobuf:"bytes,2,opt,name=version,proto3" json:"version,omitempty"` + // Operating system (e.g., "linux/amd64") + Os string `protobuf:"bytes,3,opt,name=os,proto3" json:"os,omitempty"` + // Enabled features (e.g., ["filer", "s3api", "mq"]) + Features []string `protobuf:"bytes,4,rep,name=features,proto3" json:"features,omitempty"` + // Deployment type ("standalone", "cluster", "master-only", "volume-only") + Deployment string `protobuf:"bytes,5,opt,name=deployment,proto3" json:"deployment,omitempty"` + // Number of volume servers in the cluster + VolumeServerCount int32 `protobuf:"varint,6,opt,name=volume_server_count,json=volumeServerCount,proto3" json:"volume_server_count,omitempty"` + // Total disk usage across all volume servers (in bytes) + TotalDiskBytes uint64 `protobuf:"varint,7,opt,name=total_disk_bytes,json=totalDiskBytes,proto3" json:"total_disk_bytes,omitempty"` + // Total number of volumes in the cluster + TotalVolumeCount int32 `protobuf:"varint,8,opt,name=total_volume_count,json=totalVolumeCount,proto3" json:"total_volume_count,omitempty"` + // Number of filer servers in the cluster + FilerCount int32 `protobuf:"varint,9,opt,name=filer_count,json=filerCount,proto3" json:"filer_count,omitempty"` + // Number of broker servers in the cluster + BrokerCount int32 `protobuf:"varint,10,opt,name=broker_count,json=brokerCount,proto3" json:"broker_count,omitempty"` + // Unix timestamp when the data was collected + Timestamp int64 `protobuf:"varint,11,opt,name=timestamp,proto3" json:"timestamp,omitempty"` +} + +func (x *TelemetryData) Reset() { + *x = TelemetryData{} + if protoimpl.UnsafeEnabled { + mi := &file_proto_telemetry_proto_msgTypes[0] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *TelemetryData) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*TelemetryData) ProtoMessage() {} + +func (x *TelemetryData) ProtoReflect() protoreflect.Message { + mi := &file_proto_telemetry_proto_msgTypes[0] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use TelemetryData.ProtoReflect.Descriptor instead. +func (*TelemetryData) Descriptor() ([]byte, []int) { + return file_proto_telemetry_proto_rawDescGZIP(), []int{0} +} + +func (x *TelemetryData) GetClusterId() string { + if x != nil { + return x.ClusterId + } + return "" +} + +func (x *TelemetryData) GetVersion() string { + if x != nil { + return x.Version + } + return "" +} + +func (x *TelemetryData) GetOs() string { + if x != nil { + return x.Os + } + return "" +} + +func (x *TelemetryData) GetFeatures() []string { + if x != nil { + return x.Features + } + return nil +} + +func (x *TelemetryData) GetDeployment() string { + if x != nil { + return x.Deployment + } + return "" +} + +func (x *TelemetryData) GetVolumeServerCount() int32 { + if x != nil { + return x.VolumeServerCount + } + return 0 +} + +func (x *TelemetryData) GetTotalDiskBytes() uint64 { + if x != nil { + return x.TotalDiskBytes + } + return 0 +} + +func (x *TelemetryData) GetTotalVolumeCount() int32 { + if x != nil { + return x.TotalVolumeCount + } + return 0 +} + +func (x *TelemetryData) GetFilerCount() int32 { + if x != nil { + return x.FilerCount + } + return 0 +} + +func (x *TelemetryData) GetBrokerCount() int32 { + if x != nil { + return x.BrokerCount + } + return 0 +} + +func (x *TelemetryData) GetTimestamp() int64 { + if x != nil { + return x.Timestamp + } + return 0 +} + +// TelemetryRequest is sent from SeaweedFS clusters to the telemetry server +type TelemetryRequest struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Data *TelemetryData `protobuf:"bytes,1,opt,name=data,proto3" json:"data,omitempty"` +} + +func (x *TelemetryRequest) Reset() { + *x = TelemetryRequest{} + if protoimpl.UnsafeEnabled { + mi := &file_proto_telemetry_proto_msgTypes[1] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *TelemetryRequest) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*TelemetryRequest) ProtoMessage() {} + +func (x *TelemetryRequest) ProtoReflect() protoreflect.Message { + mi := &file_proto_telemetry_proto_msgTypes[1] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use TelemetryRequest.ProtoReflect.Descriptor instead. +func (*TelemetryRequest) Descriptor() ([]byte, []int) { + return file_proto_telemetry_proto_rawDescGZIP(), []int{1} +} + +func (x *TelemetryRequest) GetData() *TelemetryData { + if x != nil { + return x.Data + } + return nil +} + +// TelemetryResponse is returned by the telemetry server +type TelemetryResponse struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Success bool `protobuf:"varint,1,opt,name=success,proto3" json:"success,omitempty"` + Message string `protobuf:"bytes,2,opt,name=message,proto3" json:"message,omitempty"` +} + +func (x *TelemetryResponse) Reset() { + *x = TelemetryResponse{} + if protoimpl.UnsafeEnabled { + mi := &file_proto_telemetry_proto_msgTypes[2] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *TelemetryResponse) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*TelemetryResponse) ProtoMessage() {} + +func (x *TelemetryResponse) ProtoReflect() protoreflect.Message { + mi := &file_proto_telemetry_proto_msgTypes[2] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use TelemetryResponse.ProtoReflect.Descriptor instead. +func (*TelemetryResponse) Descriptor() ([]byte, []int) { + return file_proto_telemetry_proto_rawDescGZIP(), []int{2} +} + +func (x *TelemetryResponse) GetSuccess() bool { + if x != nil { + return x.Success + } + return false +} + +func (x *TelemetryResponse) GetMessage() string { + if x != nil { + return x.Message + } + return "" +} + +var File_proto_telemetry_proto protoreflect.FileDescriptor + +var file_proto_telemetry_proto_rawDesc = []byte{ + 0x0a, 0x15, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2f, 0x74, 0x65, 0x6c, 0x65, 0x6d, 0x65, 0x74, 0x72, + 0x79, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x12, 0x09, 0x74, 0x65, 0x6c, 0x65, 0x6d, 0x65, 0x74, + 0x72, 0x79, 0x22, 0xfe, 0x02, 0x0a, 0x0d, 0x54, 0x65, 0x6c, 0x65, 0x6d, 0x65, 0x74, 0x72, 0x79, + 0x44, 0x61, 0x74, 0x61, 0x12, 0x1d, 0x0a, 0x0a, 0x63, 0x6c, 0x75, 0x73, 0x74, 0x65, 0x72, 0x5f, + 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x09, 0x63, 0x6c, 0x75, 0x73, 0x74, 0x65, + 0x72, 0x49, 0x64, 0x12, 0x18, 0x0a, 0x07, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x02, + 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x12, 0x0e, 0x0a, + 0x02, 0x6f, 0x73, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x02, 0x6f, 0x73, 0x12, 0x1a, 0x0a, + 0x08, 0x66, 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, 0x73, 0x18, 0x04, 0x20, 0x03, 0x28, 0x09, 0x52, + 0x08, 0x66, 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, 0x73, 0x12, 0x1e, 0x0a, 0x0a, 0x64, 0x65, 0x70, + 0x6c, 0x6f, 0x79, 0x6d, 0x65, 0x6e, 0x74, 0x18, 0x05, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0a, 0x64, + 0x65, 0x70, 0x6c, 0x6f, 0x79, 0x6d, 0x65, 0x6e, 0x74, 0x12, 0x2e, 0x0a, 0x13, 0x76, 0x6f, 0x6c, + 0x75, 0x6d, 0x65, 0x5f, 0x73, 0x65, 0x72, 0x76, 0x65, 0x72, 0x5f, 0x63, 0x6f, 0x75, 0x6e, 0x74, + 0x18, 0x06, 0x20, 0x01, 0x28, 0x05, 0x52, 0x11, 0x76, 0x6f, 0x6c, 0x75, 0x6d, 0x65, 0x53, 0x65, + 0x72, 0x76, 0x65, 0x72, 0x43, 0x6f, 0x75, 0x6e, 0x74, 0x12, 0x28, 0x0a, 0x10, 0x74, 0x6f, 0x74, + 0x61, 0x6c, 0x5f, 0x64, 0x69, 0x73, 0x6b, 0x5f, 0x62, 0x79, 0x74, 0x65, 0x73, 0x18, 0x07, 0x20, + 0x01, 0x28, 0x04, 0x52, 0x0e, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x44, 0x69, 0x73, 0x6b, 0x42, 0x79, + 0x74, 0x65, 0x73, 0x12, 0x2c, 0x0a, 0x12, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x5f, 0x76, 0x6f, 0x6c, + 0x75, 0x6d, 0x65, 0x5f, 0x63, 0x6f, 0x75, 0x6e, 0x74, 0x18, 0x08, 0x20, 0x01, 0x28, 0x05, 0x52, + 0x10, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x56, 0x6f, 0x6c, 0x75, 0x6d, 0x65, 0x43, 0x6f, 0x75, 0x6e, + 0x74, 0x12, 0x1f, 0x0a, 0x0b, 0x66, 0x69, 0x6c, 0x65, 0x72, 0x5f, 0x63, 0x6f, 0x75, 0x6e, 0x74, + 0x18, 0x09, 0x20, 0x01, 0x28, 0x05, 0x52, 0x0a, 0x66, 0x69, 0x6c, 0x65, 0x72, 0x43, 0x6f, 0x75, + 0x6e, 0x74, 0x12, 0x21, 0x0a, 0x0c, 0x62, 0x72, 0x6f, 0x6b, 0x65, 0x72, 0x5f, 0x63, 0x6f, 0x75, + 0x6e, 0x74, 0x18, 0x0a, 0x20, 0x01, 0x28, 0x05, 0x52, 0x0b, 0x62, 0x72, 0x6f, 0x6b, 0x65, 0x72, + 0x43, 0x6f, 0x75, 0x6e, 0x74, 0x12, 0x1c, 0x0a, 0x09, 0x74, 0x69, 0x6d, 0x65, 0x73, 0x74, 0x61, + 0x6d, 0x70, 0x18, 0x0b, 0x20, 0x01, 0x28, 0x03, 0x52, 0x09, 0x74, 0x69, 0x6d, 0x65, 0x73, 0x74, + 0x61, 0x6d, 0x70, 0x22, 0x40, 0x0a, 0x10, 0x54, 0x65, 0x6c, 0x65, 0x6d, 0x65, 0x74, 0x72, 0x79, + 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x2c, 0x0a, 0x04, 0x64, 0x61, 0x74, 0x61, 0x18, + 0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x18, 0x2e, 0x74, 0x65, 0x6c, 0x65, 0x6d, 0x65, 0x74, 0x72, + 0x79, 0x2e, 0x54, 0x65, 0x6c, 0x65, 0x6d, 0x65, 0x74, 0x72, 0x79, 0x44, 0x61, 0x74, 0x61, 0x52, + 0x04, 0x64, 0x61, 0x74, 0x61, 0x22, 0x47, 0x0a, 0x11, 0x54, 0x65, 0x6c, 0x65, 0x6d, 0x65, 0x74, + 0x72, 0x79, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x18, 0x0a, 0x07, 0x73, 0x75, + 0x63, 0x63, 0x65, 0x73, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x08, 0x52, 0x07, 0x73, 0x75, 0x63, + 0x63, 0x65, 0x73, 0x73, 0x12, 0x18, 0x0a, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x18, + 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x42, 0x30, + 0x5a, 0x2e, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x73, 0x65, 0x61, + 0x77, 0x65, 0x65, 0x64, 0x66, 0x73, 0x2f, 0x73, 0x65, 0x61, 0x77, 0x65, 0x65, 0x64, 0x66, 0x73, + 0x2f, 0x74, 0x65, 0x6c, 0x65, 0x6d, 0x65, 0x74, 0x72, 0x79, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, + 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, +} + +var ( + file_proto_telemetry_proto_rawDescOnce sync.Once + file_proto_telemetry_proto_rawDescData = file_proto_telemetry_proto_rawDesc +) + +func file_proto_telemetry_proto_rawDescGZIP() []byte { + file_proto_telemetry_proto_rawDescOnce.Do(func() { + file_proto_telemetry_proto_rawDescData = protoimpl.X.CompressGZIP(file_proto_telemetry_proto_rawDescData) + }) + return file_proto_telemetry_proto_rawDescData +} + +var file_proto_telemetry_proto_msgTypes = make([]protoimpl.MessageInfo, 3) +var file_proto_telemetry_proto_goTypes = []any{ + (*TelemetryData)(nil), // 0: telemetry.TelemetryData + (*TelemetryRequest)(nil), // 1: telemetry.TelemetryRequest + (*TelemetryResponse)(nil), // 2: telemetry.TelemetryResponse +} +var file_proto_telemetry_proto_depIdxs = []int32{ + 0, // 0: telemetry.TelemetryRequest.data:type_name -> telemetry.TelemetryData + 1, // [1:1] is the sub-list for method output_type + 1, // [1:1] is the sub-list for method input_type + 1, // [1:1] is the sub-list for extension type_name + 1, // [1:1] is the sub-list for extension extendee + 0, // [0:1] is the sub-list for field type_name +} + +func init() { file_proto_telemetry_proto_init() } +func file_proto_telemetry_proto_init() { + if File_proto_telemetry_proto != nil { + return + } + if !protoimpl.UnsafeEnabled { + file_proto_telemetry_proto_msgTypes[0].Exporter = func(v any, i int) any { + switch v := v.(*TelemetryData); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_proto_telemetry_proto_msgTypes[1].Exporter = func(v any, i int) any { + switch v := v.(*TelemetryRequest); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_proto_telemetry_proto_msgTypes[2].Exporter = func(v any, i int) any { + switch v := v.(*TelemetryResponse); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + } + type x struct{} + out := protoimpl.TypeBuilder{ + File: protoimpl.DescBuilder{ + GoPackagePath: reflect.TypeOf(x{}).PkgPath(), + RawDescriptor: file_proto_telemetry_proto_rawDesc, + NumEnums: 0, + NumMessages: 3, + NumExtensions: 0, + NumServices: 0, + }, + GoTypes: file_proto_telemetry_proto_goTypes, + DependencyIndexes: file_proto_telemetry_proto_depIdxs, + MessageInfos: file_proto_telemetry_proto_msgTypes, + }.Build() + File_proto_telemetry_proto = out.File + file_proto_telemetry_proto_rawDesc = nil + file_proto_telemetry_proto_goTypes = nil + file_proto_telemetry_proto_depIdxs = nil +} diff --git a/telemetry/proto/telemetry.proto b/telemetry/proto/telemetry.proto new file mode 100644 index 000000000..07bc79446 --- /dev/null +++ b/telemetry/proto/telemetry.proto @@ -0,0 +1,52 @@ +syntax = "proto3"; + +package telemetry; + +option go_package = "github.com/seaweedfs/seaweedfs/telemetry/proto"; + +// TelemetryData represents cluster-level telemetry information +message TelemetryData { + // Unique cluster identifier (generated in-memory) + string cluster_id = 1; + + // SeaweedFS version + string version = 2; + + // Operating system (e.g., "linux/amd64") + string os = 3; + + // Enabled features (e.g., ["filer", "s3api", "mq"]) + repeated string features = 4; + + // Deployment type ("standalone", "cluster", "master-only", "volume-only") + string deployment = 5; + + // Number of volume servers in the cluster + int32 volume_server_count = 6; + + // Total disk usage across all volume servers (in bytes) + uint64 total_disk_bytes = 7; + + // Total number of volumes in the cluster + int32 total_volume_count = 8; + + // Number of filer servers in the cluster + int32 filer_count = 9; + + // Number of broker servers in the cluster + int32 broker_count = 10; + + // Unix timestamp when the data was collected + int64 timestamp = 11; +} + +// TelemetryRequest is sent from SeaweedFS clusters to the telemetry server +message TelemetryRequest { + TelemetryData data = 1; +} + +// TelemetryResponse is returned by the telemetry server +message TelemetryResponse { + bool success = 1; + string message = 2; +} \ No newline at end of file diff --git a/telemetry/server/Dockerfile b/telemetry/server/Dockerfile new file mode 100644 index 000000000..8f3782fcf --- /dev/null +++ b/telemetry/server/Dockerfile @@ -0,0 +1,18 @@ +FROM golang:1.21-alpine AS builder + +WORKDIR /app +COPY go.mod go.sum ./ +RUN go mod download + +COPY . . +RUN CGO_ENABLED=0 GOOS=linux go build -a -installsuffix cgo -ldflags '-extldflags "-static"' -o telemetry-server . + +FROM alpine:latest +RUN apk --no-cache add ca-certificates +WORKDIR /root/ + +COPY --from=builder /app/telemetry-server . + +EXPOSE 8080 + +CMD ["./telemetry-server"] \ No newline at end of file diff --git a/telemetry/server/Makefile b/telemetry/server/Makefile new file mode 100644 index 000000000..cf57f1777 --- /dev/null +++ b/telemetry/server/Makefile @@ -0,0 +1,97 @@ +.PHONY: build run clean test deps proto integration-test test-all + +# Build the telemetry server +build: + go build -o telemetry-server . + +# Run the server in development mode +run: + go run . -port=8080 -dashboard=true -cleanup=1h -max-age=24h + +# Run the server in production mode +run-prod: + ./telemetry-server -port=8080 -dashboard=true -cleanup=24h -max-age=720h + +# Clean build artifacts +clean: + rm -f telemetry-server + rm -f ../test/telemetry-server-test.log + go clean + +# Run unit tests +test: + go test ./... + +# Run integration tests +integration-test: + @echo "🧪 Running telemetry integration tests..." + cd ../../ && go run telemetry/test/integration.go + +# Run all tests (unit + integration) +test-all: test integration-test + +# Install dependencies +deps: + go mod download + go mod tidy + +# Generate protobuf code (requires protoc) +proto: + cd .. && protoc --go_out=. --go_opt=paths=source_relative proto/telemetry.proto + +# Build Docker image +docker-build: + docker build -t seaweedfs-telemetry . + +# Run with Docker +docker-run: + docker run -p 8080:8080 seaweedfs-telemetry -port=8080 -dashboard=true + +# Development with auto-reload (requires air: go install github.com/cosmtrek/air@latest) +dev: + air + +# Check if protoc is available +check-protoc: + @which protoc > /dev/null || (echo "protoc is required for proto generation. Install from https://grpc.io/docs/protoc-installation/" && exit 1) + +# Full development setup +setup: check-protoc deps proto build + +# Run a quick smoke test +smoke-test: build + @echo "🔥 Running smoke test..." + @timeout 10s ./telemetry-server -port=18081 > /dev/null 2>&1 & \ + SERVER_PID=$$!; \ + sleep 2; \ + if curl -s http://localhost:18081/health > /dev/null; then \ + echo "✅ Smoke test passed - server responds to health check"; \ + else \ + echo "❌ Smoke test failed - server not responding"; \ + exit 1; \ + fi; \ + kill $$SERVER_PID 2>/dev/null || true + +# Continuous integration target +ci: deps proto build test integration-test + @echo "🎉 All CI tests passed!" + +# Help +help: + @echo "Available targets:" + @echo " build - Build the telemetry server binary" + @echo " run - Run server in development mode" + @echo " run-prod - Run server in production mode" + @echo " clean - Clean build artifacts" + @echo " test - Run unit tests" + @echo " integration-test- Run integration tests" + @echo " test-all - Run all tests (unit + integration)" + @echo " deps - Install Go dependencies" + @echo " proto - Generate protobuf code" + @echo " docker-build - Build Docker image" + @echo " docker-run - Run with Docker" + @echo " dev - Run with auto-reload (requires air)" + @echo " smoke-test - Quick server health check" + @echo " setup - Full development setup" + @echo " ci - Continuous integration (all tests)" + @echo " help - Show this help" \ No newline at end of file diff --git a/telemetry/server/api/handlers.go b/telemetry/server/api/handlers.go new file mode 100644 index 000000000..0ff00330b --- /dev/null +++ b/telemetry/server/api/handlers.go @@ -0,0 +1,152 @@ +package api + +import ( + "encoding/json" + "io" + "net/http" + "strconv" + "time" + + "github.com/seaweedfs/seaweedfs/telemetry/proto" + "github.com/seaweedfs/seaweedfs/telemetry/server/storage" + protobuf "google.golang.org/protobuf/proto" +) + +type Handler struct { + storage *storage.PrometheusStorage +} + +func NewHandler(storage *storage.PrometheusStorage) *Handler { + return &Handler{storage: storage} +} + +func (h *Handler) CollectTelemetry(w http.ResponseWriter, r *http.Request) { + if r.Method != http.MethodPost { + http.Error(w, "Method not allowed", http.StatusMethodNotAllowed) + return + } + + contentType := r.Header.Get("Content-Type") + + // Only accept protobuf content type + if contentType != "application/x-protobuf" && contentType != "application/protobuf" { + http.Error(w, "Content-Type must be application/x-protobuf", http.StatusUnsupportedMediaType) + return + } + + // Read protobuf request + body, err := io.ReadAll(r.Body) + if err != nil { + http.Error(w, "Failed to read request body", http.StatusBadRequest) + return + } + + req := &proto.TelemetryRequest{} + if err := protobuf.Unmarshal(body, req); err != nil { + http.Error(w, "Invalid protobuf data", http.StatusBadRequest) + return + } + + data := req.Data + if data == nil { + http.Error(w, "Missing telemetry data", http.StatusBadRequest) + return + } + + // Validate required fields + if data.ClusterId == "" || data.Version == "" || data.Os == "" { + http.Error(w, "Missing required fields", http.StatusBadRequest) + return + } + + // Set timestamp if not provided + if data.Timestamp == 0 { + data.Timestamp = time.Now().Unix() + } + + // Store the telemetry data + if err := h.storage.StoreTelemetry(data); err != nil { + http.Error(w, "Failed to store data", http.StatusInternalServerError) + return + } + + // Return protobuf response + resp := &proto.TelemetryResponse{ + Success: true, + Message: "Telemetry data received", + } + + respData, err := protobuf.Marshal(resp) + if err != nil { + http.Error(w, "Failed to marshal response", http.StatusInternalServerError) + return + } + + w.Header().Set("Content-Type", "application/x-protobuf") + w.WriteHeader(http.StatusOK) + w.Write(respData) +} + +func (h *Handler) GetStats(w http.ResponseWriter, r *http.Request) { + if r.Method != http.MethodGet { + http.Error(w, "Method not allowed", http.StatusMethodNotAllowed) + return + } + + stats, err := h.storage.GetStats() + if err != nil { + http.Error(w, "Failed to get stats", http.StatusInternalServerError) + return + } + + w.Header().Set("Content-Type", "application/json") + json.NewEncoder(w).Encode(stats) +} + +func (h *Handler) GetInstances(w http.ResponseWriter, r *http.Request) { + if r.Method != http.MethodGet { + http.Error(w, "Method not allowed", http.StatusMethodNotAllowed) + return + } + + limitStr := r.URL.Query().Get("limit") + limit := 100 // default + if limitStr != "" { + if l, err := strconv.Atoi(limitStr); err == nil && l > 0 && l <= 1000 { + limit = l + } + } + + instances, err := h.storage.GetInstances(limit) + if err != nil { + http.Error(w, "Failed to get instances", http.StatusInternalServerError) + return + } + + w.Header().Set("Content-Type", "application/json") + json.NewEncoder(w).Encode(instances) +} + +func (h *Handler) GetMetrics(w http.ResponseWriter, r *http.Request) { + if r.Method != http.MethodGet { + http.Error(w, "Method not allowed", http.StatusMethodNotAllowed) + return + } + + daysStr := r.URL.Query().Get("days") + days := 30 // default + if daysStr != "" { + if d, err := strconv.Atoi(daysStr); err == nil && d > 0 && d <= 365 { + days = d + } + } + + metrics, err := h.storage.GetMetrics(days) + if err != nil { + http.Error(w, "Failed to get metrics", http.StatusInternalServerError) + return + } + + w.Header().Set("Content-Type", "application/json") + json.NewEncoder(w).Encode(metrics) +} diff --git a/telemetry/server/dashboard/dashboard.go b/telemetry/server/dashboard/dashboard.go new file mode 100644 index 000000000..9a56c7f1b --- /dev/null +++ b/telemetry/server/dashboard/dashboard.go @@ -0,0 +1,278 @@ +package dashboard + +import ( + "net/http" +) + +type Handler struct{} + +func NewHandler() *Handler { + return &Handler{} +} + +func (h *Handler) ServeIndex(w http.ResponseWriter, r *http.Request) { + html := ` + +
+ + +Privacy-respecting usage analytics for SeaweedFS
+