mirror of
https://github.com/seaweedfs/seaweedfs.git
synced 2025-09-19 20:47:55 +08:00
add telemetry (#6926)
* add telemetry * fix go mod * add default telemetry server url * Update README.md * replace with broker count instead of s3 count * Update telemetry.pb.go * github action to deploy
This commit is contained in:
157
.github/workflows/deploy_telemetry.yml
vendored
Normal file
157
.github/workflows/deploy_telemetry.yml
vendored
Normal file
@@ -0,0 +1,157 @@
|
||||
# This workflow will build and deploy the SeaweedFS telemetry server
|
||||
# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-go
|
||||
|
||||
name: Deploy Telemetry Server
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ "master" ]
|
||||
paths:
|
||||
- 'telemetry/**'
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
setup:
|
||||
description: 'Run first-time server setup'
|
||||
required: true
|
||||
type: boolean
|
||||
default: false
|
||||
deploy:
|
||||
description: 'Deploy telemetry server to remote server'
|
||||
required: true
|
||||
type: boolean
|
||||
default: false
|
||||
|
||||
jobs:
|
||||
deploy:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Go
|
||||
uses: actions/setup-go@v4
|
||||
with:
|
||||
go-version: '1.24'
|
||||
|
||||
- name: Build Telemetry Server
|
||||
run: |
|
||||
go mod tidy
|
||||
cd telemetry/server
|
||||
GOOS=linux GOARCH=amd64 go build -o telemetry-server main.go
|
||||
|
||||
- name: First-time Server Setup
|
||||
if: github.event_name == 'workflow_dispatch' && inputs.setup
|
||||
env:
|
||||
SSH_PRIVATE_KEY: ${{ secrets.TELEMETRY_SSH_PRIVATE_KEY }}
|
||||
REMOTE_HOST: ${{ secrets.TELEMETRY_HOST }}
|
||||
REMOTE_USER: ${{ secrets.TELEMETRY_USER }}
|
||||
run: |
|
||||
mkdir -p ~/.ssh
|
||||
echo "$SSH_PRIVATE_KEY" > ~/.ssh/deploy_key
|
||||
chmod 600 ~/.ssh/deploy_key
|
||||
echo "Host *" > ~/.ssh/config
|
||||
echo " StrictHostKeyChecking no" >> ~/.ssh/config
|
||||
|
||||
# Create all required directories with proper permissions
|
||||
ssh -i ~/.ssh/deploy_key $REMOTE_USER@$REMOTE_HOST "
|
||||
mkdir -p ~/seaweedfs-telemetry/bin ~/seaweedfs-telemetry/logs ~/seaweedfs-telemetry/data ~/seaweedfs-telemetry/tmp && \
|
||||
chmod 755 ~/seaweedfs-telemetry/logs && \
|
||||
chmod 755 ~/seaweedfs-telemetry/data && \
|
||||
touch ~/seaweedfs-telemetry/logs/telemetry.log ~/seaweedfs-telemetry/logs/telemetry.error.log && \
|
||||
chmod 644 ~/seaweedfs-telemetry/logs/*.log"
|
||||
|
||||
# Create systemd service file
|
||||
echo "
|
||||
[Unit]
|
||||
Description=SeaweedFS Telemetry Server
|
||||
After=network.target
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
User=$REMOTE_USER
|
||||
WorkingDirectory=/home/$REMOTE_USER/seaweedfs-telemetry
|
||||
ExecStart=/home/$REMOTE_USER/seaweedfs-telemetry/bin/telemetry-server -port=8353
|
||||
Restart=always
|
||||
RestartSec=5
|
||||
StandardOutput=append:/home/$REMOTE_USER/seaweedfs-telemetry/logs/telemetry.log
|
||||
StandardError=append:/home/$REMOTE_USER/seaweedfs-telemetry/logs/telemetry.error.log
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target" > telemetry.service
|
||||
|
||||
# Setup logrotate configuration
|
||||
echo "# SeaweedFS Telemetry service log rotation
|
||||
/home/$REMOTE_USER/seaweedfs-telemetry/logs/*.log {
|
||||
daily
|
||||
rotate 30
|
||||
compress
|
||||
delaycompress
|
||||
missingok
|
||||
notifempty
|
||||
create 644 $REMOTE_USER $REMOTE_USER
|
||||
postrotate
|
||||
systemctl restart telemetry.service
|
||||
endscript
|
||||
}" > telemetry_logrotate
|
||||
|
||||
# Copy Grafana dashboard and Prometheus config
|
||||
scp -i ~/.ssh/deploy_key telemetry/grafana-dashboard.json $REMOTE_USER@$REMOTE_HOST:~/seaweedfs-telemetry/
|
||||
scp -i ~/.ssh/deploy_key telemetry/prometheus.yml $REMOTE_USER@$REMOTE_HOST:~/seaweedfs-telemetry/
|
||||
|
||||
# Copy and install service and logrotate files
|
||||
scp -i ~/.ssh/deploy_key telemetry.service telemetry_logrotate $REMOTE_USER@$REMOTE_HOST:~/seaweedfs-telemetry/
|
||||
ssh -i ~/.ssh/deploy_key $REMOTE_USER@$REMOTE_HOST "
|
||||
sudo mv ~/seaweedfs-telemetry/telemetry.service /etc/systemd/system/ && \
|
||||
sudo mv ~/seaweedfs-telemetry/telemetry_logrotate /etc/logrotate.d/seaweedfs-telemetry && \
|
||||
sudo systemctl daemon-reload && \
|
||||
sudo systemctl enable telemetry.service"
|
||||
|
||||
rm -f ~/.ssh/deploy_key
|
||||
|
||||
- name: Deploy Telemetry Server to Remote Server
|
||||
if: (github.event_name == 'push' && contains(github.ref, 'refs/heads/master')) || (github.event_name == 'workflow_dispatch' && inputs.deploy)
|
||||
env:
|
||||
SSH_PRIVATE_KEY: ${{ secrets.TELEMETRY_SSH_PRIVATE_KEY }}
|
||||
REMOTE_HOST: ${{ secrets.TELEMETRY_HOST }}
|
||||
REMOTE_USER: ${{ secrets.TELEMETRY_USER }}
|
||||
run: |
|
||||
mkdir -p ~/.ssh
|
||||
echo "$SSH_PRIVATE_KEY" > ~/.ssh/deploy_key
|
||||
chmod 600 ~/.ssh/deploy_key
|
||||
echo "Host *" > ~/.ssh/config
|
||||
echo " StrictHostKeyChecking no" >> ~/.ssh/config
|
||||
|
||||
# Create temp directory and copy binary
|
||||
ssh -i ~/.ssh/deploy_key $REMOTE_USER@$REMOTE_HOST "mkdir -p ~/seaweedfs-telemetry/tmp"
|
||||
scp -i ~/.ssh/deploy_key telemetry/server/telemetry-server $REMOTE_USER@$REMOTE_HOST:~/seaweedfs-telemetry/tmp/
|
||||
|
||||
# Copy updated configuration files
|
||||
scp -i ~/.ssh/deploy_key telemetry/grafana-dashboard.json $REMOTE_USER@$REMOTE_HOST:~/seaweedfs-telemetry/
|
||||
scp -i ~/.ssh/deploy_key telemetry/prometheus.yml $REMOTE_USER@$REMOTE_HOST:~/seaweedfs-telemetry/
|
||||
|
||||
# Stop service, move binary, and restart
|
||||
ssh -i ~/.ssh/deploy_key $REMOTE_USER@$REMOTE_HOST "
|
||||
sudo systemctl stop telemetry.service || true && \
|
||||
mkdir -p ~/seaweedfs-telemetry/bin && \
|
||||
mv ~/seaweedfs-telemetry/tmp/telemetry-server ~/seaweedfs-telemetry/bin/ && \
|
||||
chmod +x ~/seaweedfs-telemetry/bin/telemetry-server && \
|
||||
sudo systemctl start telemetry.service && \
|
||||
sudo systemctl status telemetry.service"
|
||||
|
||||
# Verify deployment
|
||||
ssh -i ~/.ssh/deploy_key $REMOTE_USER@$REMOTE_HOST "
|
||||
echo 'Waiting for service to start...'
|
||||
sleep 5
|
||||
curl -f http://localhost:8353/health || echo 'Health check failed'"
|
||||
|
||||
rm -f ~/.ssh/deploy_key
|
||||
|
||||
- name: Notify Deployment Status
|
||||
if: always()
|
||||
run: |
|
||||
if [ "${{ job.status }}" == "success" ]; then
|
||||
echo "✅ Telemetry server deployment successful"
|
||||
echo "Dashboard: http://${{ secrets.TELEMETRY_HOST }}:8353"
|
||||
echo "Metrics: http://${{ secrets.TELEMETRY_HOST }}:8353/metrics"
|
||||
else
|
||||
echo "❌ Telemetry server deployment failed"
|
||||
fi
|
271
telemetry/DEPLOYMENT.md
Normal file
271
telemetry/DEPLOYMENT.md
Normal file
@@ -0,0 +1,271 @@
|
||||
# SeaweedFS Telemetry Server Deployment
|
||||
|
||||
This document describes how to deploy the SeaweedFS telemetry server to a remote server using GitHub Actions.
|
||||
|
||||
## Prerequisites
|
||||
|
||||
1. A remote Linux server with:
|
||||
- SSH access
|
||||
- systemd (for service management)
|
||||
- Optional: Prometheus and Grafana (for monitoring)
|
||||
|
||||
2. GitHub repository secrets configured (see [Setup GitHub Secrets](#setup-github-secrets) below):
|
||||
- `TELEMETRY_SSH_PRIVATE_KEY`: SSH private key for accessing the remote server
|
||||
- `TELEMETRY_HOST`: Remote server hostname or IP address
|
||||
- `TELEMETRY_USER`: Username for SSH access
|
||||
|
||||
## Setup GitHub Secrets
|
||||
|
||||
Before using the deployment workflow, you need to configure the required secrets in your GitHub repository.
|
||||
|
||||
### Step 1: Generate SSH Key Pair
|
||||
|
||||
On your local machine, generate a new SSH key pair specifically for deployment:
|
||||
|
||||
```bash
|
||||
# Generate a new SSH key pair
|
||||
ssh-keygen -t ed25519 -C "seaweedfs-telemetry-deploy" -f ~/.ssh/seaweedfs_telemetry_deploy
|
||||
|
||||
# This creates two files:
|
||||
# ~/.ssh/seaweedfs_telemetry_deploy (private key)
|
||||
# ~/.ssh/seaweedfs_telemetry_deploy.pub (public key)
|
||||
```
|
||||
|
||||
### Step 2: Configure Remote Server
|
||||
|
||||
Copy the public key to your remote server:
|
||||
|
||||
```bash
|
||||
# Copy public key to remote server
|
||||
ssh-copy-id -i ~/.ssh/seaweedfs_telemetry_deploy.pub user@your-server.com
|
||||
|
||||
# Or manually append to authorized_keys
|
||||
cat ~/.ssh/seaweedfs_telemetry_deploy.pub | ssh user@your-server.com "mkdir -p ~/.ssh && cat >> ~/.ssh/authorized_keys"
|
||||
```
|
||||
|
||||
Test the SSH connection:
|
||||
|
||||
```bash
|
||||
# Test SSH connection with the new key
|
||||
ssh -i ~/.ssh/seaweedfs_telemetry_deploy user@your-server.com "echo 'SSH connection successful'"
|
||||
```
|
||||
|
||||
### Step 3: Add Secrets to GitHub Repository
|
||||
|
||||
1. Go to your GitHub repository
|
||||
2. Click on **Settings** tab
|
||||
3. In the sidebar, click **Secrets and variables** → **Actions**
|
||||
4. Click **New repository secret** for each of the following:
|
||||
|
||||
#### TELEMETRY_SSH_PRIVATE_KEY
|
||||
|
||||
```bash
|
||||
# Display the private key content
|
||||
cat ~/.ssh/seaweedfs_telemetry_deploy
|
||||
```
|
||||
|
||||
- **Name**: `TELEMETRY_SSH_PRIVATE_KEY`
|
||||
- **Value**: Copy the entire private key content, including the `-----BEGIN OPENSSH PRIVATE KEY-----` and `-----END OPENSSH PRIVATE KEY-----` lines
|
||||
|
||||
#### TELEMETRY_HOST
|
||||
|
||||
- **Name**: `TELEMETRY_HOST`
|
||||
- **Value**: Your server's hostname or IP address (e.g., `telemetry.example.com` or `192.168.1.100`)
|
||||
|
||||
#### TELEMETRY_USER
|
||||
|
||||
- **Name**: `TELEMETRY_USER`
|
||||
- **Value**: The username on the remote server (e.g., `ubuntu`, `deploy`, or your username)
|
||||
|
||||
### Step 4: Verify Configuration
|
||||
|
||||
Create a simple test workflow or manually trigger the deployment to verify the secrets are working correctly.
|
||||
|
||||
### Security Best Practices
|
||||
|
||||
1. **Dedicated SSH Key**: Use a separate SSH key only for deployment
|
||||
2. **Limited Permissions**: Create a dedicated user on the remote server with minimal required permissions
|
||||
3. **Key Rotation**: Regularly rotate SSH keys
|
||||
4. **Server Access**: Restrict SSH access to specific IP ranges if possible
|
||||
|
||||
### Example Server Setup
|
||||
|
||||
If you're setting up a new server, here's a basic configuration:
|
||||
|
||||
```bash
|
||||
# On the remote server, create a dedicated user for deployment
|
||||
sudo useradd -m -s /bin/bash seaweedfs-deploy
|
||||
sudo usermod -aG sudo seaweedfs-deploy # Only if sudo access is needed
|
||||
|
||||
# Switch to the deployment user
|
||||
sudo su - seaweedfs-deploy
|
||||
|
||||
# Create SSH directory
|
||||
mkdir -p ~/.ssh
|
||||
chmod 700 ~/.ssh
|
||||
|
||||
# Add your public key (paste the content of seaweedfs_telemetry_deploy.pub)
|
||||
nano ~/.ssh/authorized_keys
|
||||
chmod 600 ~/.ssh/authorized_keys
|
||||
```
|
||||
|
||||
### Troubleshooting
|
||||
|
||||
#### SSH Connection Issues
|
||||
|
||||
```bash
|
||||
# Test SSH connection manually
|
||||
ssh -i ~/.ssh/seaweedfs_telemetry_deploy -v user@your-server.com
|
||||
|
||||
# Check SSH key permissions
|
||||
ls -la ~/.ssh/seaweedfs_telemetry_deploy*
|
||||
# Should show: -rw------- for private key, -rw-r--r-- for public key
|
||||
```
|
||||
|
||||
#### GitHub Actions Fails
|
||||
|
||||
1. **Check secrets**: Ensure all three secrets are properly set in GitHub
|
||||
2. **Verify SSH key**: Make sure the entire private key (including headers/footers) is copied
|
||||
3. **Test connectivity**: Manually SSH to the server from your local machine
|
||||
4. **Check user permissions**: Ensure the remote user has necessary permissions
|
||||
|
||||
## GitHub Actions Workflow
|
||||
|
||||
The deployment workflow (`.github/workflows/deploy_telemetry.yml`) provides two main operations:
|
||||
|
||||
### 1. First-time Setup
|
||||
|
||||
Run this once to set up the remote server:
|
||||
|
||||
1. Go to GitHub Actions in your repository
|
||||
2. Select "Deploy Telemetry Server" workflow
|
||||
3. Click "Run workflow"
|
||||
4. Check "Run first-time server setup"
|
||||
5. Click "Run workflow"
|
||||
|
||||
This will:
|
||||
- Create necessary directories on the remote server
|
||||
- Set up systemd service configuration
|
||||
- Configure log rotation
|
||||
- Upload Grafana dashboard and Prometheus configuration
|
||||
|
||||
|
||||
### 2. Deploy Updates
|
||||
|
||||
Deployments happen automatically when:
|
||||
- Code is pushed to the `master` branch with changes in the `telemetry/` directory
|
||||
|
||||
Or manually trigger deployment:
|
||||
1. Go to GitHub Actions in your repository
|
||||
2. Select "Deploy Telemetry Server" workflow
|
||||
3. Click "Run workflow"
|
||||
4. Check "Deploy telemetry server to remote server"
|
||||
5. Click "Run workflow"
|
||||
|
||||
## Server Directory Structure
|
||||
|
||||
After setup, the remote server will have:
|
||||
|
||||
```
|
||||
~/seaweedfs-telemetry/
|
||||
├── bin/
|
||||
│ └── telemetry-server # Binary executable
|
||||
├── logs/
|
||||
│ ├── telemetry.log # Application logs
|
||||
│ └── telemetry.error.log # Error logs
|
||||
├── data/ # Data directory (if needed)
|
||||
├── grafana-dashboard.json # Grafana dashboard configuration
|
||||
└── prometheus.yml # Prometheus configuration
|
||||
```
|
||||
|
||||
## Service Management
|
||||
|
||||
The telemetry server runs as a systemd service:
|
||||
|
||||
```bash
|
||||
# Check service status
|
||||
sudo systemctl status telemetry.service
|
||||
|
||||
# View logs
|
||||
sudo journalctl -u telemetry.service -f
|
||||
|
||||
# Restart service
|
||||
sudo systemctl restart telemetry.service
|
||||
|
||||
# Stop/start service
|
||||
sudo systemctl stop telemetry.service
|
||||
sudo systemctl start telemetry.service
|
||||
```
|
||||
|
||||
## Accessing the Service
|
||||
|
||||
After deployment, the telemetry server will be available at:
|
||||
|
||||
- **Dashboard**: `http://your-server:8353`
|
||||
- **API**: `http://your-server:8353/api/*`
|
||||
- **Metrics**: `http://your-server:8353/metrics`
|
||||
- **Health Check**: `http://your-server:8353/health`
|
||||
|
||||
## Optional: Prometheus and Grafana Integration
|
||||
|
||||
### Prometheus Setup
|
||||
|
||||
1. Install Prometheus on your server
|
||||
2. Update `/etc/prometheus/prometheus.yml` to include:
|
||||
```yaml
|
||||
scrape_configs:
|
||||
- job_name: 'seaweedfs-telemetry'
|
||||
static_configs:
|
||||
- targets: ['localhost:8353']
|
||||
metrics_path: '/metrics'
|
||||
```
|
||||
|
||||
### Grafana Setup
|
||||
|
||||
1. Install Grafana on your server
|
||||
2. Import the dashboard from `~/seaweedfs-telemetry/grafana-dashboard.json`
|
||||
3. Configure Prometheus as a data source pointing to your Prometheus instance
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Deployment Fails
|
||||
|
||||
1. Check GitHub Actions logs for detailed error messages
|
||||
2. Verify SSH connectivity: `ssh user@host`
|
||||
3. Ensure all required secrets are configured in GitHub
|
||||
|
||||
### Service Won't Start
|
||||
|
||||
1. Check service logs: `sudo journalctl -u telemetry.service`
|
||||
2. Verify binary permissions: `ls -la ~/seaweedfs-telemetry/bin/`
|
||||
3. Test binary manually: `~/seaweedfs-telemetry/bin/telemetry-server -help`
|
||||
|
||||
### Port Conflicts
|
||||
|
||||
If port 8353 is already in use:
|
||||
|
||||
1. Edit the systemd service: `sudo systemctl edit telemetry.service`
|
||||
2. Add override configuration:
|
||||
```ini
|
||||
[Service]
|
||||
ExecStart=
|
||||
ExecStart=/home/user/seaweedfs-telemetry/bin/telemetry-server -port=8354
|
||||
```
|
||||
3. Reload and restart: `sudo systemctl daemon-reload && sudo systemctl restart telemetry.service`
|
||||
|
||||
## Security Considerations
|
||||
|
||||
1. **Firewall**: Consider restricting access to telemetry ports
|
||||
2. **SSH Keys**: Use dedicated SSH keys with minimal permissions
|
||||
3. **User Permissions**: Run the service as a non-privileged user
|
||||
4. **Network**: Consider running on internal networks only
|
||||
|
||||
## Monitoring
|
||||
|
||||
Monitor the deployment and service health:
|
||||
|
||||
- **GitHub Actions**: Check workflow runs for deployment status
|
||||
- **System Logs**: `sudo journalctl -u telemetry.service`
|
||||
- **Application Logs**: `tail -f ~/seaweedfs-telemetry/logs/telemetry.log`
|
||||
- **Health Endpoint**: `curl http://localhost:8353/health`
|
||||
- **Metrics**: `curl http://localhost:8353/metrics`
|
351
telemetry/README.md
Normal file
351
telemetry/README.md
Normal file
@@ -0,0 +1,351 @@
|
||||
# SeaweedFS Telemetry System
|
||||
|
||||
A privacy-respecting telemetry system for SeaweedFS that collects cluster-level usage statistics and provides visualization through Prometheus and Grafana.
|
||||
|
||||
## Features
|
||||
|
||||
- **Privacy-First Design**: Uses in-memory cluster IDs (regenerated on restart), no personal data collection
|
||||
- **Prometheus Integration**: Native Prometheus metrics for monitoring and alerting
|
||||
- **Grafana Dashboards**: Pre-built dashboards for data visualization
|
||||
- **Protocol Buffers**: Efficient binary data transmission for optimal performance
|
||||
- **Opt-in Only**: Disabled by default, requires explicit configuration
|
||||
- **Docker Compose**: Complete monitoring stack deployment
|
||||
- **Automatic Cleanup**: Configurable data retention policies
|
||||
|
||||
## Architecture
|
||||
|
||||
```
|
||||
SeaweedFS Cluster → Telemetry Client → Telemetry Server → Prometheus → Grafana
|
||||
(protobuf) (metrics) (queries)
|
||||
```
|
||||
|
||||
## Data Transmission
|
||||
|
||||
The telemetry system uses **Protocol Buffers exclusively** for efficient binary data transmission:
|
||||
|
||||
- **Compact Format**: 30-50% smaller than JSON
|
||||
- **Fast Serialization**: Better performance than text-based formats
|
||||
- **Type Safety**: Strong typing with generated Go structs
|
||||
- **Schema Evolution**: Built-in versioning support
|
||||
|
||||
### Protobuf Schema
|
||||
|
||||
```protobuf
|
||||
message TelemetryData {
|
||||
string cluster_id = 1; // In-memory generated UUID
|
||||
string version = 2; // SeaweedFS version
|
||||
string os = 3; // Operating system
|
||||
repeated string features = 4; // Enabled features
|
||||
string deployment = 5; // Deployment type
|
||||
int32 volume_server_count = 6; // Number of volume servers
|
||||
uint64 total_disk_bytes = 7; // Total disk usage
|
||||
int32 total_volume_count = 8; // Total volume count
|
||||
int64 timestamp = 9; // Collection timestamp
|
||||
}
|
||||
```
|
||||
|
||||
## Privacy Approach
|
||||
|
||||
- **No Personal Data**: No hostnames, IP addresses, or user information
|
||||
- **In-Memory IDs**: Cluster IDs are generated in-memory and change on restart
|
||||
- **Aggregated Data**: Only cluster-level statistics, no individual file/user data
|
||||
- **Opt-in Only**: Telemetry is disabled by default
|
||||
- **Transparent**: Open source implementation, clear data collection policy
|
||||
|
||||
## Collected Data
|
||||
|
||||
| Field | Description | Example |
|
||||
|-------|-------------|---------|
|
||||
| `cluster_id` | In-memory UUID (changes on restart) | `a1b2c3d4-...` |
|
||||
| `version` | SeaweedFS version | `3.45` |
|
||||
| `os` | Operating system and architecture | `linux/amd64` |
|
||||
| `features` | Enabled components | `["filer", "s3api"]` |
|
||||
| `deployment` | Deployment type | `cluster` |
|
||||
| `volume_server_count` | Number of volume servers | `5` |
|
||||
| `total_disk_bytes` | Total disk usage across cluster | `1073741824` |
|
||||
| `total_volume_count` | Total number of volumes | `120` |
|
||||
| `timestamp` | When data was collected | `1640995200` |
|
||||
|
||||
## Quick Start
|
||||
|
||||
### 1. Deploy Telemetry Server
|
||||
|
||||
```bash
|
||||
# Clone and start the complete monitoring stack
|
||||
git clone https://github.com/seaweedfs/seaweedfs.git
|
||||
cd seaweedfs/telemetry
|
||||
docker-compose up -d
|
||||
|
||||
# Or run the server directly
|
||||
cd server
|
||||
go run . -port=8080 -dashboard=true
|
||||
```
|
||||
|
||||
### 2. Configure SeaweedFS
|
||||
|
||||
```bash
|
||||
# Enable telemetry in SeaweedFS master (uses default telemetry.seaweedfs.com:3091)
|
||||
weed master -telemetry=true
|
||||
|
||||
# Or in server mode
|
||||
weed server -telemetry=true
|
||||
|
||||
# Or specify custom telemetry server
|
||||
weed master -telemetry=true -telemetry.url=http://localhost:8080/api/collect
|
||||
```
|
||||
|
||||
### 3. Access Dashboards
|
||||
|
||||
- **Telemetry Server**: http://localhost:8080
|
||||
- **Prometheus**: http://localhost:9090
|
||||
- **Grafana**: http://localhost:3000 (admin/admin)
|
||||
|
||||
## Configuration
|
||||
|
||||
### SeaweedFS Master/Server
|
||||
|
||||
```bash
|
||||
# Enable telemetry
|
||||
-telemetry=true
|
||||
|
||||
# Set custom telemetry server URL (optional, defaults to telemetry.seaweedfs.com:3091)
|
||||
-telemetry.url=http://your-telemetry-server:8080/api/collect
|
||||
```
|
||||
|
||||
### Telemetry Server
|
||||
|
||||
```bash
|
||||
# Server configuration
|
||||
-port=8080 # Server port
|
||||
-dashboard=true # Enable built-in dashboard
|
||||
-cleanup=24h # Cleanup interval
|
||||
-max-age=720h # Maximum data retention (30 days)
|
||||
|
||||
# Example
|
||||
./telemetry-server -port=8080 -dashboard=true -cleanup=24h -max-age=720h
|
||||
```
|
||||
|
||||
## Prometheus Metrics
|
||||
|
||||
The telemetry server exposes these Prometheus metrics:
|
||||
|
||||
### Cluster Metrics
|
||||
- `seaweedfs_telemetry_total_clusters`: Total unique clusters (30 days)
|
||||
- `seaweedfs_telemetry_active_clusters`: Active clusters (7 days)
|
||||
|
||||
### Per-Cluster Metrics
|
||||
- `seaweedfs_telemetry_volume_servers{cluster_id, version, os, deployment}`: Volume servers per cluster
|
||||
- `seaweedfs_telemetry_disk_bytes{cluster_id, version, os, deployment}`: Disk usage per cluster
|
||||
- `seaweedfs_telemetry_volume_count{cluster_id, version, os, deployment}`: Volume count per cluster
|
||||
- `seaweedfs_telemetry_filer_count{cluster_id, version, os, deployment}`: Filer servers per cluster
|
||||
- `seaweedfs_telemetry_broker_count{cluster_id, version, os, deployment}`: Broker servers per cluster
|
||||
- `seaweedfs_telemetry_cluster_info{cluster_id, version, os, deployment, features}`: Cluster metadata
|
||||
|
||||
### Server Metrics
|
||||
- `seaweedfs_telemetry_reports_received_total`: Total telemetry reports received
|
||||
|
||||
## API Endpoints
|
||||
|
||||
### Data Collection
|
||||
```bash
|
||||
# Submit telemetry data (protobuf only)
|
||||
POST /api/collect
|
||||
Content-Type: application/x-protobuf
|
||||
[TelemetryRequest protobuf data]
|
||||
```
|
||||
|
||||
### Statistics (JSON for dashboard/debugging)
|
||||
```bash
|
||||
# Get aggregated statistics
|
||||
GET /api/stats
|
||||
|
||||
# Get recent cluster instances
|
||||
GET /api/instances?limit=100
|
||||
|
||||
# Get metrics over time
|
||||
GET /api/metrics?days=30
|
||||
```
|
||||
|
||||
### Monitoring
|
||||
```bash
|
||||
# Prometheus metrics
|
||||
GET /metrics
|
||||
```
|
||||
|
||||
## Docker Deployment
|
||||
|
||||
### Complete Stack (Recommended)
|
||||
|
||||
```yaml
|
||||
# docker-compose.yml
|
||||
version: '3.8'
|
||||
services:
|
||||
telemetry-server:
|
||||
build: ./server
|
||||
ports:
|
||||
- "8080:8080"
|
||||
command: ["-port=8080", "-dashboard=true", "-cleanup=24h"]
|
||||
|
||||
prometheus:
|
||||
image: prom/prometheus:latest
|
||||
ports:
|
||||
- "9090:9090"
|
||||
volumes:
|
||||
- ./prometheus.yml:/etc/prometheus/prometheus.yml
|
||||
|
||||
grafana:
|
||||
image: grafana/grafana:latest
|
||||
ports:
|
||||
- "3000:3000"
|
||||
environment:
|
||||
- GF_SECURITY_ADMIN_PASSWORD=admin
|
||||
volumes:
|
||||
- ./grafana-provisioning:/etc/grafana/provisioning
|
||||
- ./grafana-dashboard.json:/var/lib/grafana/dashboards/seaweedfs.json
|
||||
```
|
||||
|
||||
```bash
|
||||
# Deploy the stack
|
||||
docker-compose up -d
|
||||
|
||||
# Scale telemetry server if needed
|
||||
docker-compose up -d --scale telemetry-server=3
|
||||
```
|
||||
|
||||
### Server Only
|
||||
|
||||
```bash
|
||||
# Build and run telemetry server
|
||||
cd server
|
||||
docker build -t seaweedfs-telemetry .
|
||||
docker run -p 8080:8080 seaweedfs-telemetry -port=8080 -dashboard=true
|
||||
```
|
||||
|
||||
## Development
|
||||
|
||||
### Protocol Buffer Development
|
||||
|
||||
```bash
|
||||
# Generate protobuf code
|
||||
cd telemetry
|
||||
protoc --go_out=. --go_opt=paths=source_relative proto/telemetry.proto
|
||||
|
||||
# The generated code is already included in the repository
|
||||
```
|
||||
|
||||
### Build from Source
|
||||
|
||||
```bash
|
||||
# Build telemetry server
|
||||
cd telemetry/server
|
||||
go build -o telemetry-server .
|
||||
|
||||
# Build SeaweedFS with telemetry support
|
||||
cd ../..
|
||||
go build -o weed ./weed
|
||||
```
|
||||
|
||||
### Testing
|
||||
|
||||
```bash
|
||||
# Test telemetry server
|
||||
cd telemetry/server
|
||||
go test ./...
|
||||
|
||||
# Test protobuf communication (requires protobuf tools)
|
||||
# See telemetry client code for examples
|
||||
```
|
||||
|
||||
## Grafana Dashboard
|
||||
|
||||
The included Grafana dashboard provides:
|
||||
|
||||
- **Overview**: Total and active clusters, version distribution
|
||||
- **Resource Usage**: Volume servers and disk usage over time
|
||||
- **Deployments**: Deployment type and OS distribution
|
||||
- **Growth Trends**: Historical growth patterns
|
||||
|
||||
### Custom Queries
|
||||
|
||||
```promql
|
||||
# Total active clusters
|
||||
seaweedfs_telemetry_active_clusters
|
||||
|
||||
# Disk usage by version
|
||||
sum by (version) (seaweedfs_telemetry_disk_bytes)
|
||||
|
||||
# Volume servers by deployment type
|
||||
sum by (deployment) (seaweedfs_telemetry_volume_servers)
|
||||
|
||||
# Filer servers by version
|
||||
sum by (version) (seaweedfs_telemetry_filer_count)
|
||||
|
||||
# Broker servers across all clusters
|
||||
sum(seaweedfs_telemetry_broker_count)
|
||||
|
||||
# Growth rate (weekly)
|
||||
increase(seaweedfs_telemetry_total_clusters[7d])
|
||||
```
|
||||
|
||||
## Security Considerations
|
||||
|
||||
- **Network Security**: Use HTTPS in production environments
|
||||
- **Access Control**: Implement authentication for Grafana and Prometheus
|
||||
- **Data Retention**: Configure appropriate retention policies
|
||||
- **Monitoring**: Monitor the telemetry infrastructure itself
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Common Issues
|
||||
|
||||
**SeaweedFS not sending data:**
|
||||
```bash
|
||||
# Check telemetry configuration
|
||||
weed master -h | grep telemetry
|
||||
|
||||
# Verify connectivity
|
||||
curl -v http://your-telemetry-server:8080/api/collect
|
||||
```
|
||||
|
||||
**Server not receiving data:**
|
||||
```bash
|
||||
# Check server logs
|
||||
docker-compose logs telemetry-server
|
||||
|
||||
# Verify metrics endpoint
|
||||
curl http://localhost:8080/metrics
|
||||
```
|
||||
|
||||
**Prometheus not scraping:**
|
||||
```bash
|
||||
# Check Prometheus targets
|
||||
curl http://localhost:9090/api/v1/targets
|
||||
|
||||
# Verify configuration
|
||||
docker-compose logs prometheus
|
||||
```
|
||||
|
||||
### Debugging
|
||||
|
||||
```bash
|
||||
# Enable verbose logging in SeaweedFS
|
||||
weed master -v=2 -telemetry=true
|
||||
|
||||
# Check telemetry server metrics
|
||||
curl http://localhost:8080/metrics | grep seaweedfs_telemetry
|
||||
|
||||
# Test data flow
|
||||
curl http://localhost:8080/api/stats
|
||||
```
|
||||
|
||||
## Contributing
|
||||
|
||||
1. Fork the repository
|
||||
2. Create a feature branch
|
||||
3. Make your changes
|
||||
4. Add tests if applicable
|
||||
5. Submit a pull request
|
||||
|
||||
## License
|
||||
|
||||
This telemetry system is part of SeaweedFS and follows the same Apache 2.0 license.
|
55
telemetry/docker-compose.yml
Normal file
55
telemetry/docker-compose.yml
Normal file
@@ -0,0 +1,55 @@
|
||||
version: '3.8'
|
||||
|
||||
services:
|
||||
telemetry-server:
|
||||
build: ./server
|
||||
ports:
|
||||
- "8080:8080"
|
||||
command: [
|
||||
"./telemetry-server",
|
||||
"-port=8080",
|
||||
"-dashboard=false", # Disable built-in dashboard, use Grafana
|
||||
"-log=true",
|
||||
"-cors=true"
|
||||
]
|
||||
networks:
|
||||
- telemetry
|
||||
|
||||
prometheus:
|
||||
image: prom/prometheus:latest
|
||||
ports:
|
||||
- "9090:9090"
|
||||
volumes:
|
||||
- ./prometheus.yml:/etc/prometheus/prometheus.yml
|
||||
- prometheus_data:/prometheus
|
||||
command:
|
||||
- '--config.file=/etc/prometheus/prometheus.yml'
|
||||
- '--storage.tsdb.path=/prometheus'
|
||||
- '--web.console.libraries=/etc/prometheus/console_libraries'
|
||||
- '--web.console.templates=/etc/prometheus/consoles'
|
||||
- '--storage.tsdb.retention.time=200h'
|
||||
- '--web.enable-lifecycle'
|
||||
networks:
|
||||
- telemetry
|
||||
|
||||
grafana:
|
||||
image: grafana/grafana:latest
|
||||
ports:
|
||||
- "3000:3000"
|
||||
environment:
|
||||
- GF_SECURITY_ADMIN_PASSWORD=admin
|
||||
- GF_USERS_ALLOW_SIGN_UP=false
|
||||
volumes:
|
||||
- grafana_data:/var/lib/grafana
|
||||
- ./grafana-dashboard.json:/var/lib/grafana/dashboards/seaweedfs-telemetry.json
|
||||
- ./grafana-provisioning:/etc/grafana/provisioning
|
||||
networks:
|
||||
- telemetry
|
||||
|
||||
volumes:
|
||||
prometheus_data:
|
||||
grafana_data:
|
||||
|
||||
networks:
|
||||
telemetry:
|
||||
driver: bridge
|
734
telemetry/grafana-dashboard.json
Normal file
734
telemetry/grafana-dashboard.json
Normal file
@@ -0,0 +1,734 @@
|
||||
{
|
||||
"annotations": {
|
||||
"list": [
|
||||
{
|
||||
"builtIn": 1,
|
||||
"datasource": {
|
||||
"type": "grafana",
|
||||
"uid": "-- Grafana --"
|
||||
},
|
||||
"enable": true,
|
||||
"hide": true,
|
||||
"iconColor": "rgba(0, 211, 255, 1)",
|
||||
"name": "Annotations & Alerts",
|
||||
"type": "dashboard"
|
||||
}
|
||||
]
|
||||
},
|
||||
"editable": true,
|
||||
"fiscalYearStartMonth": 0,
|
||||
"graphTooltip": 0,
|
||||
"id": null,
|
||||
"links": [],
|
||||
"liveNow": false,
|
||||
"panels": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "thresholds"
|
||||
},
|
||||
"custom": {
|
||||
"align": "auto",
|
||||
"cellOptions": {
|
||||
"type": "auto"
|
||||
},
|
||||
"inspect": false
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 0
|
||||
},
|
||||
"id": 1,
|
||||
"options": {
|
||||
"showHeader": true
|
||||
},
|
||||
"pluginVersion": "10.0.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"expr": "seaweedfs_telemetry_total_clusters",
|
||||
"format": "time_series",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Total SeaweedFS Clusters",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "thresholds"
|
||||
},
|
||||
"custom": {
|
||||
"align": "auto",
|
||||
"cellOptions": {
|
||||
"type": "auto"
|
||||
},
|
||||
"inspect": false
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 0
|
||||
},
|
||||
"id": 2,
|
||||
"options": {
|
||||
"showHeader": true
|
||||
},
|
||||
"pluginVersion": "10.0.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"expr": "seaweedfs_telemetry_active_clusters",
|
||||
"format": "time_series",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Active Clusters (7 days)",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"vis": false
|
||||
}
|
||||
},
|
||||
"mappings": []
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 8
|
||||
},
|
||||
"id": 3,
|
||||
"options": {
|
||||
"legend": {
|
||||
"displayMode": "visible",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"pieType": "pie",
|
||||
"reduceOptions": {
|
||||
"values": false,
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"fields": ""
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"expr": "count by (version) (seaweedfs_telemetry_cluster_info)",
|
||||
"format": "time_series",
|
||||
"legendFormat": "{{version}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "SeaweedFS Version Distribution",
|
||||
"type": "piechart"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"vis": false
|
||||
}
|
||||
},
|
||||
"mappings": []
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 8
|
||||
},
|
||||
"id": 4,
|
||||
"options": {
|
||||
"legend": {
|
||||
"displayMode": "visible",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"pieType": "pie",
|
||||
"reduceOptions": {
|
||||
"values": false,
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"fields": ""
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"expr": "count by (os) (seaweedfs_telemetry_cluster_info)",
|
||||
"format": "time_series",
|
||||
"legendFormat": "{{os}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Operating System Distribution",
|
||||
"type": "piechart"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"vis": false
|
||||
},
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "auto",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 16
|
||||
},
|
||||
"id": 5,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [],
|
||||
"displayMode": "list",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"expr": "sum(seaweedfs_telemetry_volume_servers)",
|
||||
"format": "time_series",
|
||||
"legendFormat": "Total Volume Servers",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Total Volume Servers Over Time",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"vis": false
|
||||
},
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "auto",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "bytes"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 24
|
||||
},
|
||||
"id": 6,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [],
|
||||
"displayMode": "list",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"expr": "sum(seaweedfs_telemetry_disk_bytes)",
|
||||
"format": "time_series",
|
||||
"legendFormat": "Total Disk Usage",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Total Disk Usage Over Time",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"vis": false
|
||||
},
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "auto",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 24
|
||||
},
|
||||
"id": 7,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [],
|
||||
"displayMode": "list",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"expr": "sum(seaweedfs_telemetry_volume_count)",
|
||||
"format": "time_series",
|
||||
"legendFormat": "Total Volume Count",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Total Volume Count Over Time",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"vis": false
|
||||
},
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "auto",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 32
|
||||
},
|
||||
"id": 8,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [],
|
||||
"displayMode": "list",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"expr": "sum(seaweedfs_telemetry_filer_count)",
|
||||
"format": "time_series",
|
||||
"legendFormat": "Total Filer Count",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Total Filer Servers Over Time",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"vis": false
|
||||
},
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "auto",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 32
|
||||
},
|
||||
"id": 9,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [],
|
||||
"displayMode": "list",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"expr": "sum(seaweedfs_telemetry_broker_count)",
|
||||
"format": "time_series",
|
||||
"legendFormat": "Total Broker Count",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Total Broker Servers Over Time",
|
||||
"type": "timeseries"
|
||||
}
|
||||
],
|
||||
"refresh": "5m",
|
||||
"schemaVersion": 38,
|
||||
"style": "dark",
|
||||
"tags": [
|
||||
"seaweedfs",
|
||||
"telemetry"
|
||||
],
|
||||
"templating": {
|
||||
"list": []
|
||||
},
|
||||
"time": {
|
||||
"from": "now-24h",
|
||||
"to": "now"
|
||||
},
|
||||
"timepicker": {},
|
||||
"timezone": "",
|
||||
"title": "SeaweedFS Telemetry Dashboard",
|
||||
"uid": "seaweedfs-telemetry",
|
||||
"version": 1,
|
||||
"weekStart": ""
|
||||
}
|
12
telemetry/grafana-provisioning/dashboards/dashboards.yml
Normal file
12
telemetry/grafana-provisioning/dashboards/dashboards.yml
Normal file
@@ -0,0 +1,12 @@
|
||||
apiVersion: 1
|
||||
|
||||
providers:
|
||||
- name: 'seaweedfs'
|
||||
orgId: 1
|
||||
folder: ''
|
||||
type: file
|
||||
disableDeletion: false
|
||||
updateIntervalSeconds: 10
|
||||
allowUiUpdates: true
|
||||
options:
|
||||
path: /var/lib/grafana/dashboards
|
@@ -0,0 +1,9 @@
|
||||
apiVersion: 1
|
||||
|
||||
datasources:
|
||||
- name: Prometheus
|
||||
type: prometheus
|
||||
access: proxy
|
||||
url: http://prometheus:9090
|
||||
isDefault: true
|
||||
editable: true
|
15
telemetry/prometheus.yml
Normal file
15
telemetry/prometheus.yml
Normal file
@@ -0,0 +1,15 @@
|
||||
global:
|
||||
scrape_interval: 15s
|
||||
evaluation_interval: 15s
|
||||
|
||||
rule_files:
|
||||
# - "first_rules.yml"
|
||||
# - "second_rules.yml"
|
||||
|
||||
scrape_configs:
|
||||
- job_name: 'seaweedfs-telemetry'
|
||||
static_configs:
|
||||
- targets: ['telemetry-server:8080']
|
||||
scrape_interval: 30s
|
||||
metrics_path: '/metrics'
|
||||
scrape_timeout: 10s
|
398
telemetry/proto/telemetry.pb.go
Normal file
398
telemetry/proto/telemetry.pb.go
Normal file
@@ -0,0 +1,398 @@
|
||||
// Code generated by protoc-gen-go. DO NOT EDIT.
|
||||
// versions:
|
||||
// protoc-gen-go v1.34.2
|
||||
// protoc v5.29.3
|
||||
// source: proto/telemetry.proto
|
||||
|
||||
package proto
|
||||
|
||||
import (
|
||||
protoreflect "google.golang.org/protobuf/reflect/protoreflect"
|
||||
protoimpl "google.golang.org/protobuf/runtime/protoimpl"
|
||||
reflect "reflect"
|
||||
sync "sync"
|
||||
)
|
||||
|
||||
const (
|
||||
// Verify that this generated code is sufficiently up-to-date.
|
||||
_ = protoimpl.EnforceVersion(20 - protoimpl.MinVersion)
|
||||
// Verify that runtime/protoimpl is sufficiently up-to-date.
|
||||
_ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20)
|
||||
)
|
||||
|
||||
// TelemetryData represents cluster-level telemetry information
|
||||
type TelemetryData struct {
|
||||
state protoimpl.MessageState
|
||||
sizeCache protoimpl.SizeCache
|
||||
unknownFields protoimpl.UnknownFields
|
||||
|
||||
// Unique cluster identifier (generated in-memory)
|
||||
ClusterId string `protobuf:"bytes,1,opt,name=cluster_id,json=clusterId,proto3" json:"cluster_id,omitempty"`
|
||||
// SeaweedFS version
|
||||
Version string `protobuf:"bytes,2,opt,name=version,proto3" json:"version,omitempty"`
|
||||
// Operating system (e.g., "linux/amd64")
|
||||
Os string `protobuf:"bytes,3,opt,name=os,proto3" json:"os,omitempty"`
|
||||
// Enabled features (e.g., ["filer", "s3api", "mq"])
|
||||
Features []string `protobuf:"bytes,4,rep,name=features,proto3" json:"features,omitempty"`
|
||||
// Deployment type ("standalone", "cluster", "master-only", "volume-only")
|
||||
Deployment string `protobuf:"bytes,5,opt,name=deployment,proto3" json:"deployment,omitempty"`
|
||||
// Number of volume servers in the cluster
|
||||
VolumeServerCount int32 `protobuf:"varint,6,opt,name=volume_server_count,json=volumeServerCount,proto3" json:"volume_server_count,omitempty"`
|
||||
// Total disk usage across all volume servers (in bytes)
|
||||
TotalDiskBytes uint64 `protobuf:"varint,7,opt,name=total_disk_bytes,json=totalDiskBytes,proto3" json:"total_disk_bytes,omitempty"`
|
||||
// Total number of volumes in the cluster
|
||||
TotalVolumeCount int32 `protobuf:"varint,8,opt,name=total_volume_count,json=totalVolumeCount,proto3" json:"total_volume_count,omitempty"`
|
||||
// Number of filer servers in the cluster
|
||||
FilerCount int32 `protobuf:"varint,9,opt,name=filer_count,json=filerCount,proto3" json:"filer_count,omitempty"`
|
||||
// Number of broker servers in the cluster
|
||||
BrokerCount int32 `protobuf:"varint,10,opt,name=broker_count,json=brokerCount,proto3" json:"broker_count,omitempty"`
|
||||
// Unix timestamp when the data was collected
|
||||
Timestamp int64 `protobuf:"varint,11,opt,name=timestamp,proto3" json:"timestamp,omitempty"`
|
||||
}
|
||||
|
||||
func (x *TelemetryData) Reset() {
|
||||
*x = TelemetryData{}
|
||||
if protoimpl.UnsafeEnabled {
|
||||
mi := &file_proto_telemetry_proto_msgTypes[0]
|
||||
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
|
||||
ms.StoreMessageInfo(mi)
|
||||
}
|
||||
}
|
||||
|
||||
func (x *TelemetryData) String() string {
|
||||
return protoimpl.X.MessageStringOf(x)
|
||||
}
|
||||
|
||||
func (*TelemetryData) ProtoMessage() {}
|
||||
|
||||
func (x *TelemetryData) ProtoReflect() protoreflect.Message {
|
||||
mi := &file_proto_telemetry_proto_msgTypes[0]
|
||||
if protoimpl.UnsafeEnabled && x != nil {
|
||||
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
|
||||
if ms.LoadMessageInfo() == nil {
|
||||
ms.StoreMessageInfo(mi)
|
||||
}
|
||||
return ms
|
||||
}
|
||||
return mi.MessageOf(x)
|
||||
}
|
||||
|
||||
// Deprecated: Use TelemetryData.ProtoReflect.Descriptor instead.
|
||||
func (*TelemetryData) Descriptor() ([]byte, []int) {
|
||||
return file_proto_telemetry_proto_rawDescGZIP(), []int{0}
|
||||
}
|
||||
|
||||
func (x *TelemetryData) GetClusterId() string {
|
||||
if x != nil {
|
||||
return x.ClusterId
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func (x *TelemetryData) GetVersion() string {
|
||||
if x != nil {
|
||||
return x.Version
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func (x *TelemetryData) GetOs() string {
|
||||
if x != nil {
|
||||
return x.Os
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func (x *TelemetryData) GetFeatures() []string {
|
||||
if x != nil {
|
||||
return x.Features
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (x *TelemetryData) GetDeployment() string {
|
||||
if x != nil {
|
||||
return x.Deployment
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func (x *TelemetryData) GetVolumeServerCount() int32 {
|
||||
if x != nil {
|
||||
return x.VolumeServerCount
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func (x *TelemetryData) GetTotalDiskBytes() uint64 {
|
||||
if x != nil {
|
||||
return x.TotalDiskBytes
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func (x *TelemetryData) GetTotalVolumeCount() int32 {
|
||||
if x != nil {
|
||||
return x.TotalVolumeCount
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func (x *TelemetryData) GetFilerCount() int32 {
|
||||
if x != nil {
|
||||
return x.FilerCount
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func (x *TelemetryData) GetBrokerCount() int32 {
|
||||
if x != nil {
|
||||
return x.BrokerCount
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func (x *TelemetryData) GetTimestamp() int64 {
|
||||
if x != nil {
|
||||
return x.Timestamp
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
// TelemetryRequest is sent from SeaweedFS clusters to the telemetry server
|
||||
type TelemetryRequest struct {
|
||||
state protoimpl.MessageState
|
||||
sizeCache protoimpl.SizeCache
|
||||
unknownFields protoimpl.UnknownFields
|
||||
|
||||
Data *TelemetryData `protobuf:"bytes,1,opt,name=data,proto3" json:"data,omitempty"`
|
||||
}
|
||||
|
||||
func (x *TelemetryRequest) Reset() {
|
||||
*x = TelemetryRequest{}
|
||||
if protoimpl.UnsafeEnabled {
|
||||
mi := &file_proto_telemetry_proto_msgTypes[1]
|
||||
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
|
||||
ms.StoreMessageInfo(mi)
|
||||
}
|
||||
}
|
||||
|
||||
func (x *TelemetryRequest) String() string {
|
||||
return protoimpl.X.MessageStringOf(x)
|
||||
}
|
||||
|
||||
func (*TelemetryRequest) ProtoMessage() {}
|
||||
|
||||
func (x *TelemetryRequest) ProtoReflect() protoreflect.Message {
|
||||
mi := &file_proto_telemetry_proto_msgTypes[1]
|
||||
if protoimpl.UnsafeEnabled && x != nil {
|
||||
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
|
||||
if ms.LoadMessageInfo() == nil {
|
||||
ms.StoreMessageInfo(mi)
|
||||
}
|
||||
return ms
|
||||
}
|
||||
return mi.MessageOf(x)
|
||||
}
|
||||
|
||||
// Deprecated: Use TelemetryRequest.ProtoReflect.Descriptor instead.
|
||||
func (*TelemetryRequest) Descriptor() ([]byte, []int) {
|
||||
return file_proto_telemetry_proto_rawDescGZIP(), []int{1}
|
||||
}
|
||||
|
||||
func (x *TelemetryRequest) GetData() *TelemetryData {
|
||||
if x != nil {
|
||||
return x.Data
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// TelemetryResponse is returned by the telemetry server
|
||||
type TelemetryResponse struct {
|
||||
state protoimpl.MessageState
|
||||
sizeCache protoimpl.SizeCache
|
||||
unknownFields protoimpl.UnknownFields
|
||||
|
||||
Success bool `protobuf:"varint,1,opt,name=success,proto3" json:"success,omitempty"`
|
||||
Message string `protobuf:"bytes,2,opt,name=message,proto3" json:"message,omitempty"`
|
||||
}
|
||||
|
||||
func (x *TelemetryResponse) Reset() {
|
||||
*x = TelemetryResponse{}
|
||||
if protoimpl.UnsafeEnabled {
|
||||
mi := &file_proto_telemetry_proto_msgTypes[2]
|
||||
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
|
||||
ms.StoreMessageInfo(mi)
|
||||
}
|
||||
}
|
||||
|
||||
func (x *TelemetryResponse) String() string {
|
||||
return protoimpl.X.MessageStringOf(x)
|
||||
}
|
||||
|
||||
func (*TelemetryResponse) ProtoMessage() {}
|
||||
|
||||
func (x *TelemetryResponse) ProtoReflect() protoreflect.Message {
|
||||
mi := &file_proto_telemetry_proto_msgTypes[2]
|
||||
if protoimpl.UnsafeEnabled && x != nil {
|
||||
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
|
||||
if ms.LoadMessageInfo() == nil {
|
||||
ms.StoreMessageInfo(mi)
|
||||
}
|
||||
return ms
|
||||
}
|
||||
return mi.MessageOf(x)
|
||||
}
|
||||
|
||||
// Deprecated: Use TelemetryResponse.ProtoReflect.Descriptor instead.
|
||||
func (*TelemetryResponse) Descriptor() ([]byte, []int) {
|
||||
return file_proto_telemetry_proto_rawDescGZIP(), []int{2}
|
||||
}
|
||||
|
||||
func (x *TelemetryResponse) GetSuccess() bool {
|
||||
if x != nil {
|
||||
return x.Success
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (x *TelemetryResponse) GetMessage() string {
|
||||
if x != nil {
|
||||
return x.Message
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
var File_proto_telemetry_proto protoreflect.FileDescriptor
|
||||
|
||||
var file_proto_telemetry_proto_rawDesc = []byte{
|
||||
0x0a, 0x15, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2f, 0x74, 0x65, 0x6c, 0x65, 0x6d, 0x65, 0x74, 0x72,
|
||||
0x79, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x12, 0x09, 0x74, 0x65, 0x6c, 0x65, 0x6d, 0x65, 0x74,
|
||||
0x72, 0x79, 0x22, 0xfe, 0x02, 0x0a, 0x0d, 0x54, 0x65, 0x6c, 0x65, 0x6d, 0x65, 0x74, 0x72, 0x79,
|
||||
0x44, 0x61, 0x74, 0x61, 0x12, 0x1d, 0x0a, 0x0a, 0x63, 0x6c, 0x75, 0x73, 0x74, 0x65, 0x72, 0x5f,
|
||||
0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x09, 0x63, 0x6c, 0x75, 0x73, 0x74, 0x65,
|
||||
0x72, 0x49, 0x64, 0x12, 0x18, 0x0a, 0x07, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x02,
|
||||
0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x12, 0x0e, 0x0a,
|
||||
0x02, 0x6f, 0x73, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x02, 0x6f, 0x73, 0x12, 0x1a, 0x0a,
|
||||
0x08, 0x66, 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, 0x73, 0x18, 0x04, 0x20, 0x03, 0x28, 0x09, 0x52,
|
||||
0x08, 0x66, 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, 0x73, 0x12, 0x1e, 0x0a, 0x0a, 0x64, 0x65, 0x70,
|
||||
0x6c, 0x6f, 0x79, 0x6d, 0x65, 0x6e, 0x74, 0x18, 0x05, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0a, 0x64,
|
||||
0x65, 0x70, 0x6c, 0x6f, 0x79, 0x6d, 0x65, 0x6e, 0x74, 0x12, 0x2e, 0x0a, 0x13, 0x76, 0x6f, 0x6c,
|
||||
0x75, 0x6d, 0x65, 0x5f, 0x73, 0x65, 0x72, 0x76, 0x65, 0x72, 0x5f, 0x63, 0x6f, 0x75, 0x6e, 0x74,
|
||||
0x18, 0x06, 0x20, 0x01, 0x28, 0x05, 0x52, 0x11, 0x76, 0x6f, 0x6c, 0x75, 0x6d, 0x65, 0x53, 0x65,
|
||||
0x72, 0x76, 0x65, 0x72, 0x43, 0x6f, 0x75, 0x6e, 0x74, 0x12, 0x28, 0x0a, 0x10, 0x74, 0x6f, 0x74,
|
||||
0x61, 0x6c, 0x5f, 0x64, 0x69, 0x73, 0x6b, 0x5f, 0x62, 0x79, 0x74, 0x65, 0x73, 0x18, 0x07, 0x20,
|
||||
0x01, 0x28, 0x04, 0x52, 0x0e, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x44, 0x69, 0x73, 0x6b, 0x42, 0x79,
|
||||
0x74, 0x65, 0x73, 0x12, 0x2c, 0x0a, 0x12, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x5f, 0x76, 0x6f, 0x6c,
|
||||
0x75, 0x6d, 0x65, 0x5f, 0x63, 0x6f, 0x75, 0x6e, 0x74, 0x18, 0x08, 0x20, 0x01, 0x28, 0x05, 0x52,
|
||||
0x10, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x56, 0x6f, 0x6c, 0x75, 0x6d, 0x65, 0x43, 0x6f, 0x75, 0x6e,
|
||||
0x74, 0x12, 0x1f, 0x0a, 0x0b, 0x66, 0x69, 0x6c, 0x65, 0x72, 0x5f, 0x63, 0x6f, 0x75, 0x6e, 0x74,
|
||||
0x18, 0x09, 0x20, 0x01, 0x28, 0x05, 0x52, 0x0a, 0x66, 0x69, 0x6c, 0x65, 0x72, 0x43, 0x6f, 0x75,
|
||||
0x6e, 0x74, 0x12, 0x21, 0x0a, 0x0c, 0x62, 0x72, 0x6f, 0x6b, 0x65, 0x72, 0x5f, 0x63, 0x6f, 0x75,
|
||||
0x6e, 0x74, 0x18, 0x0a, 0x20, 0x01, 0x28, 0x05, 0x52, 0x0b, 0x62, 0x72, 0x6f, 0x6b, 0x65, 0x72,
|
||||
0x43, 0x6f, 0x75, 0x6e, 0x74, 0x12, 0x1c, 0x0a, 0x09, 0x74, 0x69, 0x6d, 0x65, 0x73, 0x74, 0x61,
|
||||
0x6d, 0x70, 0x18, 0x0b, 0x20, 0x01, 0x28, 0x03, 0x52, 0x09, 0x74, 0x69, 0x6d, 0x65, 0x73, 0x74,
|
||||
0x61, 0x6d, 0x70, 0x22, 0x40, 0x0a, 0x10, 0x54, 0x65, 0x6c, 0x65, 0x6d, 0x65, 0x74, 0x72, 0x79,
|
||||
0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x2c, 0x0a, 0x04, 0x64, 0x61, 0x74, 0x61, 0x18,
|
||||
0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x18, 0x2e, 0x74, 0x65, 0x6c, 0x65, 0x6d, 0x65, 0x74, 0x72,
|
||||
0x79, 0x2e, 0x54, 0x65, 0x6c, 0x65, 0x6d, 0x65, 0x74, 0x72, 0x79, 0x44, 0x61, 0x74, 0x61, 0x52,
|
||||
0x04, 0x64, 0x61, 0x74, 0x61, 0x22, 0x47, 0x0a, 0x11, 0x54, 0x65, 0x6c, 0x65, 0x6d, 0x65, 0x74,
|
||||
0x72, 0x79, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x18, 0x0a, 0x07, 0x73, 0x75,
|
||||
0x63, 0x63, 0x65, 0x73, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x08, 0x52, 0x07, 0x73, 0x75, 0x63,
|
||||
0x63, 0x65, 0x73, 0x73, 0x12, 0x18, 0x0a, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x18,
|
||||
0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x42, 0x30,
|
||||
0x5a, 0x2e, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x73, 0x65, 0x61,
|
||||
0x77, 0x65, 0x65, 0x64, 0x66, 0x73, 0x2f, 0x73, 0x65, 0x61, 0x77, 0x65, 0x65, 0x64, 0x66, 0x73,
|
||||
0x2f, 0x74, 0x65, 0x6c, 0x65, 0x6d, 0x65, 0x74, 0x72, 0x79, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f,
|
||||
0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33,
|
||||
}
|
||||
|
||||
var (
|
||||
file_proto_telemetry_proto_rawDescOnce sync.Once
|
||||
file_proto_telemetry_proto_rawDescData = file_proto_telemetry_proto_rawDesc
|
||||
)
|
||||
|
||||
func file_proto_telemetry_proto_rawDescGZIP() []byte {
|
||||
file_proto_telemetry_proto_rawDescOnce.Do(func() {
|
||||
file_proto_telemetry_proto_rawDescData = protoimpl.X.CompressGZIP(file_proto_telemetry_proto_rawDescData)
|
||||
})
|
||||
return file_proto_telemetry_proto_rawDescData
|
||||
}
|
||||
|
||||
var file_proto_telemetry_proto_msgTypes = make([]protoimpl.MessageInfo, 3)
|
||||
var file_proto_telemetry_proto_goTypes = []any{
|
||||
(*TelemetryData)(nil), // 0: telemetry.TelemetryData
|
||||
(*TelemetryRequest)(nil), // 1: telemetry.TelemetryRequest
|
||||
(*TelemetryResponse)(nil), // 2: telemetry.TelemetryResponse
|
||||
}
|
||||
var file_proto_telemetry_proto_depIdxs = []int32{
|
||||
0, // 0: telemetry.TelemetryRequest.data:type_name -> telemetry.TelemetryData
|
||||
1, // [1:1] is the sub-list for method output_type
|
||||
1, // [1:1] is the sub-list for method input_type
|
||||
1, // [1:1] is the sub-list for extension type_name
|
||||
1, // [1:1] is the sub-list for extension extendee
|
||||
0, // [0:1] is the sub-list for field type_name
|
||||
}
|
||||
|
||||
func init() { file_proto_telemetry_proto_init() }
|
||||
func file_proto_telemetry_proto_init() {
|
||||
if File_proto_telemetry_proto != nil {
|
||||
return
|
||||
}
|
||||
if !protoimpl.UnsafeEnabled {
|
||||
file_proto_telemetry_proto_msgTypes[0].Exporter = func(v any, i int) any {
|
||||
switch v := v.(*TelemetryData); i {
|
||||
case 0:
|
||||
return &v.state
|
||||
case 1:
|
||||
return &v.sizeCache
|
||||
case 2:
|
||||
return &v.unknownFields
|
||||
default:
|
||||
return nil
|
||||
}
|
||||
}
|
||||
file_proto_telemetry_proto_msgTypes[1].Exporter = func(v any, i int) any {
|
||||
switch v := v.(*TelemetryRequest); i {
|
||||
case 0:
|
||||
return &v.state
|
||||
case 1:
|
||||
return &v.sizeCache
|
||||
case 2:
|
||||
return &v.unknownFields
|
||||
default:
|
||||
return nil
|
||||
}
|
||||
}
|
||||
file_proto_telemetry_proto_msgTypes[2].Exporter = func(v any, i int) any {
|
||||
switch v := v.(*TelemetryResponse); i {
|
||||
case 0:
|
||||
return &v.state
|
||||
case 1:
|
||||
return &v.sizeCache
|
||||
case 2:
|
||||
return &v.unknownFields
|
||||
default:
|
||||
return nil
|
||||
}
|
||||
}
|
||||
}
|
||||
type x struct{}
|
||||
out := protoimpl.TypeBuilder{
|
||||
File: protoimpl.DescBuilder{
|
||||
GoPackagePath: reflect.TypeOf(x{}).PkgPath(),
|
||||
RawDescriptor: file_proto_telemetry_proto_rawDesc,
|
||||
NumEnums: 0,
|
||||
NumMessages: 3,
|
||||
NumExtensions: 0,
|
||||
NumServices: 0,
|
||||
},
|
||||
GoTypes: file_proto_telemetry_proto_goTypes,
|
||||
DependencyIndexes: file_proto_telemetry_proto_depIdxs,
|
||||
MessageInfos: file_proto_telemetry_proto_msgTypes,
|
||||
}.Build()
|
||||
File_proto_telemetry_proto = out.File
|
||||
file_proto_telemetry_proto_rawDesc = nil
|
||||
file_proto_telemetry_proto_goTypes = nil
|
||||
file_proto_telemetry_proto_depIdxs = nil
|
||||
}
|
52
telemetry/proto/telemetry.proto
Normal file
52
telemetry/proto/telemetry.proto
Normal file
@@ -0,0 +1,52 @@
|
||||
syntax = "proto3";
|
||||
|
||||
package telemetry;
|
||||
|
||||
option go_package = "github.com/seaweedfs/seaweedfs/telemetry/proto";
|
||||
|
||||
// TelemetryData represents cluster-level telemetry information
|
||||
message TelemetryData {
|
||||
// Unique cluster identifier (generated in-memory)
|
||||
string cluster_id = 1;
|
||||
|
||||
// SeaweedFS version
|
||||
string version = 2;
|
||||
|
||||
// Operating system (e.g., "linux/amd64")
|
||||
string os = 3;
|
||||
|
||||
// Enabled features (e.g., ["filer", "s3api", "mq"])
|
||||
repeated string features = 4;
|
||||
|
||||
// Deployment type ("standalone", "cluster", "master-only", "volume-only")
|
||||
string deployment = 5;
|
||||
|
||||
// Number of volume servers in the cluster
|
||||
int32 volume_server_count = 6;
|
||||
|
||||
// Total disk usage across all volume servers (in bytes)
|
||||
uint64 total_disk_bytes = 7;
|
||||
|
||||
// Total number of volumes in the cluster
|
||||
int32 total_volume_count = 8;
|
||||
|
||||
// Number of filer servers in the cluster
|
||||
int32 filer_count = 9;
|
||||
|
||||
// Number of broker servers in the cluster
|
||||
int32 broker_count = 10;
|
||||
|
||||
// Unix timestamp when the data was collected
|
||||
int64 timestamp = 11;
|
||||
}
|
||||
|
||||
// TelemetryRequest is sent from SeaweedFS clusters to the telemetry server
|
||||
message TelemetryRequest {
|
||||
TelemetryData data = 1;
|
||||
}
|
||||
|
||||
// TelemetryResponse is returned by the telemetry server
|
||||
message TelemetryResponse {
|
||||
bool success = 1;
|
||||
string message = 2;
|
||||
}
|
18
telemetry/server/Dockerfile
Normal file
18
telemetry/server/Dockerfile
Normal file
@@ -0,0 +1,18 @@
|
||||
FROM golang:1.21-alpine AS builder
|
||||
|
||||
WORKDIR /app
|
||||
COPY go.mod go.sum ./
|
||||
RUN go mod download
|
||||
|
||||
COPY . .
|
||||
RUN CGO_ENABLED=0 GOOS=linux go build -a -installsuffix cgo -ldflags '-extldflags "-static"' -o telemetry-server .
|
||||
|
||||
FROM alpine:latest
|
||||
RUN apk --no-cache add ca-certificates
|
||||
WORKDIR /root/
|
||||
|
||||
COPY --from=builder /app/telemetry-server .
|
||||
|
||||
EXPOSE 8080
|
||||
|
||||
CMD ["./telemetry-server"]
|
97
telemetry/server/Makefile
Normal file
97
telemetry/server/Makefile
Normal file
@@ -0,0 +1,97 @@
|
||||
.PHONY: build run clean test deps proto integration-test test-all
|
||||
|
||||
# Build the telemetry server
|
||||
build:
|
||||
go build -o telemetry-server .
|
||||
|
||||
# Run the server in development mode
|
||||
run:
|
||||
go run . -port=8080 -dashboard=true -cleanup=1h -max-age=24h
|
||||
|
||||
# Run the server in production mode
|
||||
run-prod:
|
||||
./telemetry-server -port=8080 -dashboard=true -cleanup=24h -max-age=720h
|
||||
|
||||
# Clean build artifacts
|
||||
clean:
|
||||
rm -f telemetry-server
|
||||
rm -f ../test/telemetry-server-test.log
|
||||
go clean
|
||||
|
||||
# Run unit tests
|
||||
test:
|
||||
go test ./...
|
||||
|
||||
# Run integration tests
|
||||
integration-test:
|
||||
@echo "🧪 Running telemetry integration tests..."
|
||||
cd ../../ && go run telemetry/test/integration.go
|
||||
|
||||
# Run all tests (unit + integration)
|
||||
test-all: test integration-test
|
||||
|
||||
# Install dependencies
|
||||
deps:
|
||||
go mod download
|
||||
go mod tidy
|
||||
|
||||
# Generate protobuf code (requires protoc)
|
||||
proto:
|
||||
cd .. && protoc --go_out=. --go_opt=paths=source_relative proto/telemetry.proto
|
||||
|
||||
# Build Docker image
|
||||
docker-build:
|
||||
docker build -t seaweedfs-telemetry .
|
||||
|
||||
# Run with Docker
|
||||
docker-run:
|
||||
docker run -p 8080:8080 seaweedfs-telemetry -port=8080 -dashboard=true
|
||||
|
||||
# Development with auto-reload (requires air: go install github.com/cosmtrek/air@latest)
|
||||
dev:
|
||||
air
|
||||
|
||||
# Check if protoc is available
|
||||
check-protoc:
|
||||
@which protoc > /dev/null || (echo "protoc is required for proto generation. Install from https://grpc.io/docs/protoc-installation/" && exit 1)
|
||||
|
||||
# Full development setup
|
||||
setup: check-protoc deps proto build
|
||||
|
||||
# Run a quick smoke test
|
||||
smoke-test: build
|
||||
@echo "🔥 Running smoke test..."
|
||||
@timeout 10s ./telemetry-server -port=18081 > /dev/null 2>&1 & \
|
||||
SERVER_PID=$$!; \
|
||||
sleep 2; \
|
||||
if curl -s http://localhost:18081/health > /dev/null; then \
|
||||
echo "✅ Smoke test passed - server responds to health check"; \
|
||||
else \
|
||||
echo "❌ Smoke test failed - server not responding"; \
|
||||
exit 1; \
|
||||
fi; \
|
||||
kill $$SERVER_PID 2>/dev/null || true
|
||||
|
||||
# Continuous integration target
|
||||
ci: deps proto build test integration-test
|
||||
@echo "🎉 All CI tests passed!"
|
||||
|
||||
# Help
|
||||
help:
|
||||
@echo "Available targets:"
|
||||
@echo " build - Build the telemetry server binary"
|
||||
@echo " run - Run server in development mode"
|
||||
@echo " run-prod - Run server in production mode"
|
||||
@echo " clean - Clean build artifacts"
|
||||
@echo " test - Run unit tests"
|
||||
@echo " integration-test- Run integration tests"
|
||||
@echo " test-all - Run all tests (unit + integration)"
|
||||
@echo " deps - Install Go dependencies"
|
||||
@echo " proto - Generate protobuf code"
|
||||
@echo " docker-build - Build Docker image"
|
||||
@echo " docker-run - Run with Docker"
|
||||
@echo " dev - Run with auto-reload (requires air)"
|
||||
@echo " smoke-test - Quick server health check"
|
||||
@echo " setup - Full development setup"
|
||||
@echo " ci - Continuous integration (all tests)"
|
||||
@echo " help - Show this help"
|
152
telemetry/server/api/handlers.go
Normal file
152
telemetry/server/api/handlers.go
Normal file
@@ -0,0 +1,152 @@
|
||||
package api
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"io"
|
||||
"net/http"
|
||||
"strconv"
|
||||
"time"
|
||||
|
||||
"github.com/seaweedfs/seaweedfs/telemetry/proto"
|
||||
"github.com/seaweedfs/seaweedfs/telemetry/server/storage"
|
||||
protobuf "google.golang.org/protobuf/proto"
|
||||
)
|
||||
|
||||
type Handler struct {
|
||||
storage *storage.PrometheusStorage
|
||||
}
|
||||
|
||||
func NewHandler(storage *storage.PrometheusStorage) *Handler {
|
||||
return &Handler{storage: storage}
|
||||
}
|
||||
|
||||
func (h *Handler) CollectTelemetry(w http.ResponseWriter, r *http.Request) {
|
||||
if r.Method != http.MethodPost {
|
||||
http.Error(w, "Method not allowed", http.StatusMethodNotAllowed)
|
||||
return
|
||||
}
|
||||
|
||||
contentType := r.Header.Get("Content-Type")
|
||||
|
||||
// Only accept protobuf content type
|
||||
if contentType != "application/x-protobuf" && contentType != "application/protobuf" {
|
||||
http.Error(w, "Content-Type must be application/x-protobuf", http.StatusUnsupportedMediaType)
|
||||
return
|
||||
}
|
||||
|
||||
// Read protobuf request
|
||||
body, err := io.ReadAll(r.Body)
|
||||
if err != nil {
|
||||
http.Error(w, "Failed to read request body", http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
|
||||
req := &proto.TelemetryRequest{}
|
||||
if err := protobuf.Unmarshal(body, req); err != nil {
|
||||
http.Error(w, "Invalid protobuf data", http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
|
||||
data := req.Data
|
||||
if data == nil {
|
||||
http.Error(w, "Missing telemetry data", http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
|
||||
// Validate required fields
|
||||
if data.ClusterId == "" || data.Version == "" || data.Os == "" {
|
||||
http.Error(w, "Missing required fields", http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
|
||||
// Set timestamp if not provided
|
||||
if data.Timestamp == 0 {
|
||||
data.Timestamp = time.Now().Unix()
|
||||
}
|
||||
|
||||
// Store the telemetry data
|
||||
if err := h.storage.StoreTelemetry(data); err != nil {
|
||||
http.Error(w, "Failed to store data", http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
|
||||
// Return protobuf response
|
||||
resp := &proto.TelemetryResponse{
|
||||
Success: true,
|
||||
Message: "Telemetry data received",
|
||||
}
|
||||
|
||||
respData, err := protobuf.Marshal(resp)
|
||||
if err != nil {
|
||||
http.Error(w, "Failed to marshal response", http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
|
||||
w.Header().Set("Content-Type", "application/x-protobuf")
|
||||
w.WriteHeader(http.StatusOK)
|
||||
w.Write(respData)
|
||||
}
|
||||
|
||||
func (h *Handler) GetStats(w http.ResponseWriter, r *http.Request) {
|
||||
if r.Method != http.MethodGet {
|
||||
http.Error(w, "Method not allowed", http.StatusMethodNotAllowed)
|
||||
return
|
||||
}
|
||||
|
||||
stats, err := h.storage.GetStats()
|
||||
if err != nil {
|
||||
http.Error(w, "Failed to get stats", http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
json.NewEncoder(w).Encode(stats)
|
||||
}
|
||||
|
||||
func (h *Handler) GetInstances(w http.ResponseWriter, r *http.Request) {
|
||||
if r.Method != http.MethodGet {
|
||||
http.Error(w, "Method not allowed", http.StatusMethodNotAllowed)
|
||||
return
|
||||
}
|
||||
|
||||
limitStr := r.URL.Query().Get("limit")
|
||||
limit := 100 // default
|
||||
if limitStr != "" {
|
||||
if l, err := strconv.Atoi(limitStr); err == nil && l > 0 && l <= 1000 {
|
||||
limit = l
|
||||
}
|
||||
}
|
||||
|
||||
instances, err := h.storage.GetInstances(limit)
|
||||
if err != nil {
|
||||
http.Error(w, "Failed to get instances", http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
json.NewEncoder(w).Encode(instances)
|
||||
}
|
||||
|
||||
func (h *Handler) GetMetrics(w http.ResponseWriter, r *http.Request) {
|
||||
if r.Method != http.MethodGet {
|
||||
http.Error(w, "Method not allowed", http.StatusMethodNotAllowed)
|
||||
return
|
||||
}
|
||||
|
||||
daysStr := r.URL.Query().Get("days")
|
||||
days := 30 // default
|
||||
if daysStr != "" {
|
||||
if d, err := strconv.Atoi(daysStr); err == nil && d > 0 && d <= 365 {
|
||||
days = d
|
||||
}
|
||||
}
|
||||
|
||||
metrics, err := h.storage.GetMetrics(days)
|
||||
if err != nil {
|
||||
http.Error(w, "Failed to get metrics", http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
json.NewEncoder(w).Encode(metrics)
|
||||
}
|
278
telemetry/server/dashboard/dashboard.go
Normal file
278
telemetry/server/dashboard/dashboard.go
Normal file
@@ -0,0 +1,278 @@
|
||||
package dashboard
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
)
|
||||
|
||||
type Handler struct{}
|
||||
|
||||
func NewHandler() *Handler {
|
||||
return &Handler{}
|
||||
}
|
||||
|
||||
func (h *Handler) ServeIndex(w http.ResponseWriter, r *http.Request) {
|
||||
html := `<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>SeaweedFS Telemetry Dashboard</title>
|
||||
<script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
|
||||
<style>
|
||||
body {
|
||||
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
|
||||
margin: 0;
|
||||
padding: 20px;
|
||||
background-color: #f5f5f5;
|
||||
}
|
||||
.container {
|
||||
max-width: 1200px;
|
||||
margin: 0 auto;
|
||||
}
|
||||
.header {
|
||||
background: white;
|
||||
padding: 20px;
|
||||
border-radius: 8px;
|
||||
margin-bottom: 20px;
|
||||
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
|
||||
}
|
||||
.stats-grid {
|
||||
display: grid;
|
||||
grid-template-columns: repeat(auto-fit, minmax(250px, 1fr));
|
||||
gap: 20px;
|
||||
margin-bottom: 20px;
|
||||
}
|
||||
.stat-card {
|
||||
background: white;
|
||||
padding: 20px;
|
||||
border-radius: 8px;
|
||||
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
|
||||
}
|
||||
.stat-value {
|
||||
font-size: 2em;
|
||||
font-weight: bold;
|
||||
color: #2196F3;
|
||||
}
|
||||
.stat-label {
|
||||
color: #666;
|
||||
margin-top: 5px;
|
||||
}
|
||||
.chart-container {
|
||||
background: white;
|
||||
padding: 20px;
|
||||
border-radius: 8px;
|
||||
margin-bottom: 20px;
|
||||
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
|
||||
}
|
||||
.chart-title {
|
||||
font-size: 1.2em;
|
||||
font-weight: bold;
|
||||
margin-bottom: 15px;
|
||||
}
|
||||
.loading {
|
||||
text-align: center;
|
||||
padding: 40px;
|
||||
color: #666;
|
||||
}
|
||||
.error {
|
||||
background: #ffebee;
|
||||
color: #c62828;
|
||||
padding: 15px;
|
||||
border-radius: 4px;
|
||||
margin: 10px 0;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<div class="container">
|
||||
<div class="header">
|
||||
<h1>SeaweedFS Telemetry Dashboard</h1>
|
||||
<p>Privacy-respecting usage analytics for SeaweedFS</p>
|
||||
</div>
|
||||
|
||||
<div id="loading" class="loading">Loading telemetry data...</div>
|
||||
<div id="error" class="error" style="display: none;"></div>
|
||||
|
||||
<div id="dashboard" style="display: none;">
|
||||
<div class="stats-grid">
|
||||
<div class="stat-card">
|
||||
<div class="stat-value" id="totalInstances">-</div>
|
||||
<div class="stat-label">Total Instances (30 days)</div>
|
||||
</div>
|
||||
<div class="stat-card">
|
||||
<div class="stat-value" id="activeInstances">-</div>
|
||||
<div class="stat-label">Active Instances (7 days)</div>
|
||||
</div>
|
||||
<div class="stat-card">
|
||||
<div class="stat-value" id="totalVersions">-</div>
|
||||
<div class="stat-label">Different Versions</div>
|
||||
</div>
|
||||
<div class="stat-card">
|
||||
<div class="stat-value" id="totalOS">-</div>
|
||||
<div class="stat-label">Operating Systems</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="chart-container">
|
||||
<div class="chart-title">Version Distribution</div>
|
||||
<canvas id="versionChart" width="400" height="200"></canvas>
|
||||
</div>
|
||||
|
||||
<div class="chart-container">
|
||||
<div class="chart-title">Operating System Distribution</div>
|
||||
<canvas id="osChart" width="400" height="200"></canvas>
|
||||
</div>
|
||||
|
||||
<div class="chart-container">
|
||||
<div class="chart-title">Deployment Types</div>
|
||||
<canvas id="deploymentChart" width="400" height="200"></canvas>
|
||||
</div>
|
||||
|
||||
<div class="chart-container">
|
||||
<div class="chart-title">Volume Servers Over Time</div>
|
||||
<canvas id="serverChart" width="400" height="200"></canvas>
|
||||
</div>
|
||||
|
||||
<div class="chart-container">
|
||||
<div class="chart-title">Total Disk Usage Over Time</div>
|
||||
<canvas id="diskChart" width="400" height="200"></canvas>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<script>
|
||||
let charts = {};
|
||||
|
||||
async function loadDashboard() {
|
||||
try {
|
||||
// Load stats
|
||||
const statsResponse = await fetch('/api/stats');
|
||||
const stats = await statsResponse.json();
|
||||
|
||||
// Load metrics
|
||||
const metricsResponse = await fetch('/api/metrics?days=30');
|
||||
const metrics = await metricsResponse.json();
|
||||
|
||||
updateStats(stats);
|
||||
updateCharts(stats, metrics);
|
||||
|
||||
document.getElementById('loading').style.display = 'none';
|
||||
document.getElementById('dashboard').style.display = 'block';
|
||||
} catch (error) {
|
||||
console.error('Error loading dashboard:', error);
|
||||
showError('Failed to load telemetry data: ' + error.message);
|
||||
}
|
||||
}
|
||||
|
||||
function updateStats(stats) {
|
||||
document.getElementById('totalInstances').textContent = stats.total_instances || 0;
|
||||
document.getElementById('activeInstances').textContent = stats.active_instances || 0;
|
||||
document.getElementById('totalVersions').textContent = Object.keys(stats.versions || {}).length;
|
||||
document.getElementById('totalOS').textContent = Object.keys(stats.os_distribution || {}).length;
|
||||
}
|
||||
|
||||
function updateCharts(stats, metrics) {
|
||||
// Version chart
|
||||
createPieChart('versionChart', 'Version Distribution', stats.versions || {});
|
||||
|
||||
// OS chart
|
||||
createPieChart('osChart', 'Operating System Distribution', stats.os_distribution || {});
|
||||
|
||||
// Deployment chart
|
||||
createPieChart('deploymentChart', 'Deployment Types', stats.deployments || {});
|
||||
|
||||
// Server count over time
|
||||
if (metrics.dates && metrics.server_counts) {
|
||||
createLineChart('serverChart', 'Volume Servers', metrics.dates, metrics.server_counts, '#2196F3');
|
||||
}
|
||||
|
||||
// Disk usage over time
|
||||
if (metrics.dates && metrics.disk_usage) {
|
||||
const diskUsageGB = metrics.disk_usage.map(bytes => Math.round(bytes / (1024 * 1024 * 1024)));
|
||||
createLineChart('diskChart', 'Disk Usage (GB)', metrics.dates, diskUsageGB, '#4CAF50');
|
||||
}
|
||||
}
|
||||
|
||||
function createPieChart(canvasId, title, data) {
|
||||
const ctx = document.getElementById(canvasId).getContext('2d');
|
||||
|
||||
if (charts[canvasId]) {
|
||||
charts[canvasId].destroy();
|
||||
}
|
||||
|
||||
const labels = Object.keys(data);
|
||||
const values = Object.values(data);
|
||||
|
||||
charts[canvasId] = new Chart(ctx, {
|
||||
type: 'pie',
|
||||
data: {
|
||||
labels: labels,
|
||||
datasets: [{
|
||||
data: values,
|
||||
backgroundColor: [
|
||||
'#FF6384', '#36A2EB', '#FFCE56', '#4BC0C0',
|
||||
'#9966FF', '#FF9F40', '#FF6384', '#C9CBCF'
|
||||
]
|
||||
}]
|
||||
},
|
||||
options: {
|
||||
responsive: true,
|
||||
plugins: {
|
||||
legend: {
|
||||
position: 'bottom'
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
function createLineChart(canvasId, label, labels, data, color) {
|
||||
const ctx = document.getElementById(canvasId).getContext('2d');
|
||||
|
||||
if (charts[canvasId]) {
|
||||
charts[canvasId].destroy();
|
||||
}
|
||||
|
||||
charts[canvasId] = new Chart(ctx, {
|
||||
type: 'line',
|
||||
data: {
|
||||
labels: labels,
|
||||
datasets: [{
|
||||
label: label,
|
||||
data: data,
|
||||
borderColor: color,
|
||||
backgroundColor: color + '20',
|
||||
fill: true,
|
||||
tension: 0.1
|
||||
}]
|
||||
},
|
||||
options: {
|
||||
responsive: true,
|
||||
scales: {
|
||||
y: {
|
||||
beginAtZero: true
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
function showError(message) {
|
||||
document.getElementById('loading').style.display = 'none';
|
||||
document.getElementById('error').style.display = 'block';
|
||||
document.getElementById('error').textContent = message;
|
||||
}
|
||||
|
||||
// Load dashboard on page load
|
||||
loadDashboard();
|
||||
|
||||
// Refresh every 5 minutes
|
||||
setInterval(loadDashboard, 5 * 60 * 1000);
|
||||
</script>
|
||||
</body>
|
||||
</html>`
|
||||
|
||||
w.Header().Set("Content-Type", "text/html")
|
||||
w.WriteHeader(http.StatusOK)
|
||||
w.Write([]byte(html))
|
||||
}
|
31
telemetry/server/go.sum
Normal file
31
telemetry/server/go.sum
Normal file
@@ -0,0 +1,31 @@
|
||||
github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
|
||||
github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
|
||||
github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44=
|
||||
github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
|
||||
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
||||
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
|
||||
github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk=
|
||||
github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg=
|
||||
github.com/golang/protobuf v1.5.3/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY=
|
||||
github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
|
||||
github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38=
|
||||
github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
|
||||
github.com/matttproud/golang_protobuf_extensions v1.0.4 h1:mmDVorXM7PCGKw94cs5zkfA9PSy5pEvNWRP0ET0TIVo=
|
||||
github.com/matttproud/golang_protobuf_extensions v1.0.4/go.mod h1:BSXmuO+STAnVfrANrmjBb36TMTDstsz7MSK+HVaYKv4=
|
||||
github.com/prometheus/client_golang v1.17.0 h1:rl2sfwZMtSthVU752MqfjQozy7blglC+1SOtjMAMh+Q=
|
||||
github.com/prometheus/client_golang v1.17.0/go.mod h1:VeL+gMmOAxkS2IqfCq0ZmHSL+LjWfWDUmp1mBz9JgUY=
|
||||
github.com/prometheus/client_model v0.4.1-0.20230718164431-9a2bf3000d16 h1:v7DLqVdK4VrYkVD5diGdl4sxJurKJEMnODWRJlxV9oM=
|
||||
github.com/prometheus/client_model v0.4.1-0.20230718164431-9a2bf3000d16/go.mod h1:oMQmHW1/JoDwqLtg57MGgP/Fb1CJEYF2imWWhWtMkYU=
|
||||
github.com/prometheus/common v0.44.0 h1:+5BrQJwiBB9xsMygAB3TNvpQKOwlkc25LbISbrdOOfY=
|
||||
github.com/prometheus/common v0.44.0/go.mod h1:ofAIvZbQ1e/nugmZGz4/qCb9Ap1VoSTIO7x0VV9VvuY=
|
||||
github.com/prometheus/procfs v0.11.1 h1:xRC8Iq1yyca5ypa9n1EZnWZkt7dwcoRPQwX/5gwaUuI=
|
||||
github.com/prometheus/procfs v0.11.1/go.mod h1:eesXgaPo1q7lBpVMoMy0ZOFTth9hBn4W/y0/p/ScXhY=
|
||||
golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||
golang.org/x/sys v0.11.0 h1:eG7RXZHdqOJ1i+0lgLgCpSXAp6M3LYlAo6osgSi0xOM=
|
||||
golang.org/x/sys v0.11.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||
google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw=
|
||||
google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc=
|
||||
google.golang.org/protobuf v1.31.0 h1:g0LDEJHgrBl9N9r17Ru3sqWhkIx2NB67okBHPwC7hs8=
|
||||
google.golang.org/protobuf v1.31.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I=
|
111
telemetry/server/main.go
Normal file
111
telemetry/server/main.go
Normal file
@@ -0,0 +1,111 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"flag"
|
||||
"fmt"
|
||||
"log"
|
||||
"net/http"
|
||||
"time"
|
||||
|
||||
"github.com/prometheus/client_golang/prometheus/promhttp"
|
||||
"github.com/seaweedfs/seaweedfs/telemetry/server/api"
|
||||
"github.com/seaweedfs/seaweedfs/telemetry/server/dashboard"
|
||||
"github.com/seaweedfs/seaweedfs/telemetry/server/storage"
|
||||
)
|
||||
|
||||
var (
|
||||
port = flag.Int("port", 8080, "HTTP server port")
|
||||
enableCORS = flag.Bool("cors", true, "Enable CORS for dashboard")
|
||||
logRequests = flag.Bool("log", true, "Log incoming requests")
|
||||
enableDashboard = flag.Bool("dashboard", true, "Enable built-in dashboard (optional when using Grafana)")
|
||||
cleanupInterval = flag.Duration("cleanup", 24*time.Hour, "Cleanup interval for old instances")
|
||||
maxInstanceAge = flag.Duration("max-age", 30*24*time.Hour, "Maximum age for instances before cleanup")
|
||||
)
|
||||
|
||||
func main() {
|
||||
flag.Parse()
|
||||
|
||||
// Create Prometheus storage instance
|
||||
store := storage.NewPrometheusStorage()
|
||||
|
||||
// Start cleanup routine
|
||||
go func() {
|
||||
ticker := time.NewTicker(*cleanupInterval)
|
||||
defer ticker.Stop()
|
||||
for range ticker.C {
|
||||
store.CleanupOldInstances(*maxInstanceAge)
|
||||
}
|
||||
}()
|
||||
|
||||
// Setup HTTP handlers
|
||||
mux := http.NewServeMux()
|
||||
|
||||
// Prometheus metrics endpoint
|
||||
mux.Handle("/metrics", promhttp.Handler())
|
||||
|
||||
// API endpoints
|
||||
apiHandler := api.NewHandler(store)
|
||||
mux.HandleFunc("/api/collect", corsMiddleware(logMiddleware(apiHandler.CollectTelemetry)))
|
||||
mux.HandleFunc("/api/stats", corsMiddleware(logMiddleware(apiHandler.GetStats)))
|
||||
mux.HandleFunc("/api/instances", corsMiddleware(logMiddleware(apiHandler.GetInstances)))
|
||||
mux.HandleFunc("/api/metrics", corsMiddleware(logMiddleware(apiHandler.GetMetrics)))
|
||||
|
||||
// Dashboard (optional)
|
||||
if *enableDashboard {
|
||||
dashboardHandler := dashboard.NewHandler()
|
||||
mux.HandleFunc("/", corsMiddleware(dashboardHandler.ServeIndex))
|
||||
mux.HandleFunc("/dashboard", corsMiddleware(dashboardHandler.ServeIndex))
|
||||
mux.Handle("/static/", http.StripPrefix("/static/", http.FileServer(http.Dir("./static"))))
|
||||
}
|
||||
|
||||
// Health check
|
||||
mux.HandleFunc("/health", func(w http.ResponseWriter, r *http.Request) {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
json.NewEncoder(w).Encode(map[string]string{
|
||||
"status": "ok",
|
||||
"time": time.Now().UTC().Format(time.RFC3339),
|
||||
})
|
||||
})
|
||||
|
||||
addr := fmt.Sprintf(":%d", *port)
|
||||
log.Printf("Starting telemetry server on %s", addr)
|
||||
log.Printf("Prometheus metrics: http://localhost%s/metrics", addr)
|
||||
if *enableDashboard {
|
||||
log.Printf("Dashboard: http://localhost%s/dashboard", addr)
|
||||
}
|
||||
log.Printf("Cleanup interval: %v, Max instance age: %v", *cleanupInterval, *maxInstanceAge)
|
||||
|
||||
if err := http.ListenAndServe(addr, mux); err != nil {
|
||||
log.Fatalf("Server failed: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func corsMiddleware(next http.HandlerFunc) http.HandlerFunc {
|
||||
return func(w http.ResponseWriter, r *http.Request) {
|
||||
if *enableCORS {
|
||||
w.Header().Set("Access-Control-Allow-Origin", "*")
|
||||
w.Header().Set("Access-Control-Allow-Methods", "GET, POST, OPTIONS")
|
||||
w.Header().Set("Access-Control-Allow-Headers", "Content-Type, Authorization")
|
||||
}
|
||||
|
||||
if r.Method == "OPTIONS" {
|
||||
w.WriteHeader(http.StatusOK)
|
||||
return
|
||||
}
|
||||
|
||||
next(w, r)
|
||||
}
|
||||
}
|
||||
|
||||
func logMiddleware(next http.HandlerFunc) http.HandlerFunc {
|
||||
return func(w http.ResponseWriter, r *http.Request) {
|
||||
if *logRequests {
|
||||
start := time.Now()
|
||||
next(w, r)
|
||||
log.Printf("%s %s %s %v", r.Method, r.URL.Path, r.RemoteAddr, time.Since(start))
|
||||
} else {
|
||||
next(w, r)
|
||||
}
|
||||
}
|
||||
}
|
245
telemetry/server/storage/prometheus.go
Normal file
245
telemetry/server/storage/prometheus.go
Normal file
@@ -0,0 +1,245 @@
|
||||
package storage
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
"github.com/prometheus/client_golang/prometheus/promauto"
|
||||
"github.com/seaweedfs/seaweedfs/telemetry/proto"
|
||||
)
|
||||
|
||||
type PrometheusStorage struct {
|
||||
// Prometheus metrics
|
||||
totalClusters prometheus.Gauge
|
||||
activeClusters prometheus.Gauge
|
||||
volumeServerCount *prometheus.GaugeVec
|
||||
totalDiskBytes *prometheus.GaugeVec
|
||||
totalVolumeCount *prometheus.GaugeVec
|
||||
filerCount *prometheus.GaugeVec
|
||||
brokerCount *prometheus.GaugeVec
|
||||
clusterInfo *prometheus.GaugeVec
|
||||
telemetryReceived prometheus.Counter
|
||||
|
||||
// In-memory storage for API endpoints (if needed)
|
||||
mu sync.RWMutex
|
||||
instances map[string]*telemetryData
|
||||
stats map[string]interface{}
|
||||
}
|
||||
|
||||
// telemetryData is an internal struct that includes the received timestamp
|
||||
type telemetryData struct {
|
||||
*proto.TelemetryData
|
||||
ReceivedAt time.Time `json:"received_at"`
|
||||
}
|
||||
|
||||
func NewPrometheusStorage() *PrometheusStorage {
|
||||
return &PrometheusStorage{
|
||||
totalClusters: promauto.NewGauge(prometheus.GaugeOpts{
|
||||
Name: "seaweedfs_telemetry_total_clusters",
|
||||
Help: "Total number of unique SeaweedFS clusters (last 30 days)",
|
||||
}),
|
||||
activeClusters: promauto.NewGauge(prometheus.GaugeOpts{
|
||||
Name: "seaweedfs_telemetry_active_clusters",
|
||||
Help: "Number of active SeaweedFS clusters (last 7 days)",
|
||||
}),
|
||||
volumeServerCount: promauto.NewGaugeVec(prometheus.GaugeOpts{
|
||||
Name: "seaweedfs_telemetry_volume_servers",
|
||||
Help: "Number of volume servers per cluster",
|
||||
}, []string{"cluster_id", "version", "os", "deployment"}),
|
||||
totalDiskBytes: promauto.NewGaugeVec(prometheus.GaugeOpts{
|
||||
Name: "seaweedfs_telemetry_disk_bytes",
|
||||
Help: "Total disk usage in bytes per cluster",
|
||||
}, []string{"cluster_id", "version", "os", "deployment"}),
|
||||
totalVolumeCount: promauto.NewGaugeVec(prometheus.GaugeOpts{
|
||||
Name: "seaweedfs_telemetry_volume_count",
|
||||
Help: "Total number of volumes per cluster",
|
||||
}, []string{"cluster_id", "version", "os", "deployment"}),
|
||||
filerCount: promauto.NewGaugeVec(prometheus.GaugeOpts{
|
||||
Name: "seaweedfs_telemetry_filer_count",
|
||||
Help: "Number of filer servers per cluster",
|
||||
}, []string{"cluster_id", "version", "os", "deployment"}),
|
||||
brokerCount: promauto.NewGaugeVec(prometheus.GaugeOpts{
|
||||
Name: "seaweedfs_telemetry_broker_count",
|
||||
Help: "Number of broker servers per cluster",
|
||||
}, []string{"cluster_id", "version", "os", "deployment"}),
|
||||
clusterInfo: promauto.NewGaugeVec(prometheus.GaugeOpts{
|
||||
Name: "seaweedfs_telemetry_cluster_info",
|
||||
Help: "Cluster information (always 1, labels contain metadata)",
|
||||
}, []string{"cluster_id", "version", "os", "deployment", "features"}),
|
||||
telemetryReceived: promauto.NewCounter(prometheus.CounterOpts{
|
||||
Name: "seaweedfs_telemetry_reports_received_total",
|
||||
Help: "Total number of telemetry reports received",
|
||||
}),
|
||||
instances: make(map[string]*telemetryData),
|
||||
stats: make(map[string]interface{}),
|
||||
}
|
||||
}
|
||||
|
||||
func (s *PrometheusStorage) StoreTelemetry(data *proto.TelemetryData) error {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
|
||||
// Update Prometheus metrics
|
||||
labels := prometheus.Labels{
|
||||
"cluster_id": data.ClusterId,
|
||||
"version": data.Version,
|
||||
"os": data.Os,
|
||||
"deployment": data.Deployment,
|
||||
}
|
||||
|
||||
s.volumeServerCount.With(labels).Set(float64(data.VolumeServerCount))
|
||||
s.totalDiskBytes.With(labels).Set(float64(data.TotalDiskBytes))
|
||||
s.totalVolumeCount.With(labels).Set(float64(data.TotalVolumeCount))
|
||||
s.filerCount.With(labels).Set(float64(data.FilerCount))
|
||||
s.brokerCount.With(labels).Set(float64(data.BrokerCount))
|
||||
|
||||
// Features as JSON string for the label
|
||||
featuresJSON, _ := json.Marshal(data.Features)
|
||||
infoLabels := prometheus.Labels{
|
||||
"cluster_id": data.ClusterId,
|
||||
"version": data.Version,
|
||||
"os": data.Os,
|
||||
"deployment": data.Deployment,
|
||||
"features": string(featuresJSON),
|
||||
}
|
||||
s.clusterInfo.With(infoLabels).Set(1)
|
||||
|
||||
s.telemetryReceived.Inc()
|
||||
|
||||
// Store in memory for API endpoints
|
||||
s.instances[data.ClusterId] = &telemetryData{
|
||||
TelemetryData: data,
|
||||
ReceivedAt: time.Now().UTC(),
|
||||
}
|
||||
|
||||
// Update aggregated stats
|
||||
s.updateStats()
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *PrometheusStorage) GetStats() (map[string]interface{}, error) {
|
||||
s.mu.RLock()
|
||||
defer s.mu.RUnlock()
|
||||
|
||||
// Return cached stats
|
||||
result := make(map[string]interface{})
|
||||
for k, v := range s.stats {
|
||||
result[k] = v
|
||||
}
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func (s *PrometheusStorage) GetInstances(limit int) ([]*telemetryData, error) {
|
||||
s.mu.RLock()
|
||||
defer s.mu.RUnlock()
|
||||
|
||||
var instances []*telemetryData
|
||||
count := 0
|
||||
for _, instance := range s.instances {
|
||||
if count >= limit {
|
||||
break
|
||||
}
|
||||
instances = append(instances, instance)
|
||||
count++
|
||||
}
|
||||
|
||||
return instances, nil
|
||||
}
|
||||
|
||||
func (s *PrometheusStorage) GetMetrics(days int) (map[string]interface{}, error) {
|
||||
s.mu.RLock()
|
||||
defer s.mu.RUnlock()
|
||||
|
||||
// Return current metrics from in-memory storage
|
||||
// Historical data should be queried from Prometheus directly
|
||||
cutoff := time.Now().AddDate(0, 0, -days)
|
||||
|
||||
var volumeServers []map[string]interface{}
|
||||
var diskUsage []map[string]interface{}
|
||||
|
||||
for _, instance := range s.instances {
|
||||
if instance.ReceivedAt.After(cutoff) {
|
||||
volumeServers = append(volumeServers, map[string]interface{}{
|
||||
"date": instance.ReceivedAt.Format("2006-01-02"),
|
||||
"value": instance.TelemetryData.VolumeServerCount,
|
||||
})
|
||||
diskUsage = append(diskUsage, map[string]interface{}{
|
||||
"date": instance.ReceivedAt.Format("2006-01-02"),
|
||||
"value": instance.TelemetryData.TotalDiskBytes,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
return map[string]interface{}{
|
||||
"volume_servers": volumeServers,
|
||||
"disk_usage": diskUsage,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (s *PrometheusStorage) updateStats() {
|
||||
now := time.Now()
|
||||
last7Days := now.AddDate(0, 0, -7)
|
||||
last30Days := now.AddDate(0, 0, -30)
|
||||
|
||||
totalInstances := 0
|
||||
activeInstances := 0
|
||||
versions := make(map[string]int)
|
||||
osDistribution := make(map[string]int)
|
||||
deployments := make(map[string]int)
|
||||
|
||||
for _, instance := range s.instances {
|
||||
if instance.ReceivedAt.After(last30Days) {
|
||||
totalInstances++
|
||||
}
|
||||
if instance.ReceivedAt.After(last7Days) {
|
||||
activeInstances++
|
||||
versions[instance.TelemetryData.Version]++
|
||||
osDistribution[instance.TelemetryData.Os]++
|
||||
deployments[instance.TelemetryData.Deployment]++
|
||||
}
|
||||
}
|
||||
|
||||
// Update Prometheus gauges
|
||||
s.totalClusters.Set(float64(totalInstances))
|
||||
s.activeClusters.Set(float64(activeInstances))
|
||||
|
||||
// Update cached stats for API
|
||||
s.stats = map[string]interface{}{
|
||||
"total_instances": totalInstances,
|
||||
"active_instances": activeInstances,
|
||||
"versions": versions,
|
||||
"os_distribution": osDistribution,
|
||||
"deployments": deployments,
|
||||
}
|
||||
}
|
||||
|
||||
// CleanupOldInstances removes instances older than the specified duration
|
||||
func (s *PrometheusStorage) CleanupOldInstances(maxAge time.Duration) {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
|
||||
cutoff := time.Now().Add(-maxAge)
|
||||
for instanceID, instance := range s.instances {
|
||||
if instance.ReceivedAt.Before(cutoff) {
|
||||
delete(s.instances, instanceID)
|
||||
|
||||
// Remove from Prometheus metrics
|
||||
labels := prometheus.Labels{
|
||||
"cluster_id": instance.TelemetryData.ClusterId,
|
||||
"version": instance.TelemetryData.Version,
|
||||
"os": instance.TelemetryData.Os,
|
||||
"deployment": instance.TelemetryData.Deployment,
|
||||
}
|
||||
s.volumeServerCount.Delete(labels)
|
||||
s.totalDiskBytes.Delete(labels)
|
||||
s.totalVolumeCount.Delete(labels)
|
||||
s.filerCount.Delete(labels)
|
||||
s.brokerCount.Delete(labels)
|
||||
}
|
||||
}
|
||||
|
||||
s.updateStats()
|
||||
}
|
315
telemetry/test/integration.go
Normal file
315
telemetry/test/integration.go
Normal file
@@ -0,0 +1,315 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"net/http"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
"github.com/seaweedfs/seaweedfs/telemetry/proto"
|
||||
"github.com/seaweedfs/seaweedfs/weed/telemetry"
|
||||
protobuf "google.golang.org/protobuf/proto"
|
||||
)
|
||||
|
||||
const (
|
||||
serverPort = "18080" // Use different port to avoid conflicts
|
||||
serverURL = "http://localhost:" + serverPort
|
||||
)
|
||||
|
||||
func main() {
|
||||
fmt.Println("🧪 Starting SeaweedFS Telemetry Integration Test")
|
||||
|
||||
// Start telemetry server
|
||||
fmt.Println("📡 Starting telemetry server...")
|
||||
serverCmd, err := startTelemetryServer()
|
||||
if err != nil {
|
||||
log.Fatalf("❌ Failed to start telemetry server: %v", err)
|
||||
}
|
||||
defer stopServer(serverCmd)
|
||||
|
||||
// Wait for server to start
|
||||
if !waitForServer(serverURL+"/health", 15*time.Second) {
|
||||
log.Fatal("❌ Telemetry server failed to start")
|
||||
}
|
||||
fmt.Println("✅ Telemetry server started successfully")
|
||||
|
||||
// Test protobuf marshaling first
|
||||
fmt.Println("🔧 Testing protobuf marshaling...")
|
||||
if err := testProtobufMarshaling(); err != nil {
|
||||
log.Fatalf("❌ Protobuf marshaling test failed: %v", err)
|
||||
}
|
||||
fmt.Println("✅ Protobuf marshaling test passed")
|
||||
|
||||
// Test protobuf client
|
||||
fmt.Println("🔄 Testing protobuf telemetry client...")
|
||||
if err := testTelemetryClient(); err != nil {
|
||||
log.Fatalf("❌ Telemetry client test failed: %v", err)
|
||||
}
|
||||
fmt.Println("✅ Telemetry client test passed")
|
||||
|
||||
// Test server metrics endpoint
|
||||
fmt.Println("📊 Testing Prometheus metrics endpoint...")
|
||||
if err := testMetricsEndpoint(); err != nil {
|
||||
log.Fatalf("❌ Metrics endpoint test failed: %v", err)
|
||||
}
|
||||
fmt.Println("✅ Metrics endpoint test passed")
|
||||
|
||||
// Test stats API
|
||||
fmt.Println("📈 Testing stats API...")
|
||||
if err := testStatsAPI(); err != nil {
|
||||
log.Fatalf("❌ Stats API test failed: %v", err)
|
||||
}
|
||||
fmt.Println("✅ Stats API test passed")
|
||||
|
||||
// Test instances API
|
||||
fmt.Println("📋 Testing instances API...")
|
||||
if err := testInstancesAPI(); err != nil {
|
||||
log.Fatalf("❌ Instances API test failed: %v", err)
|
||||
}
|
||||
fmt.Println("✅ Instances API test passed")
|
||||
|
||||
fmt.Println("🎉 All telemetry integration tests passed!")
|
||||
}
|
||||
|
||||
func startTelemetryServer() (*exec.Cmd, error) {
|
||||
// Get the directory where this test is running
|
||||
testDir, err := os.Getwd()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get working directory: %v", err)
|
||||
}
|
||||
|
||||
// Navigate to the server directory (from main seaweedfs directory)
|
||||
serverDir := filepath.Join(testDir, "telemetry", "server")
|
||||
|
||||
cmd := exec.Command("go", "run", ".",
|
||||
"-port="+serverPort,
|
||||
"-dashboard=false",
|
||||
"-cleanup=1m",
|
||||
"-max-age=1h")
|
||||
|
||||
cmd.Dir = serverDir
|
||||
|
||||
// Create log files for server output
|
||||
logFile, err := os.Create("telemetry-server-test.log")
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create log file: %v", err)
|
||||
}
|
||||
|
||||
cmd.Stdout = logFile
|
||||
cmd.Stderr = logFile
|
||||
|
||||
if err := cmd.Start(); err != nil {
|
||||
return nil, fmt.Errorf("failed to start server: %v", err)
|
||||
}
|
||||
|
||||
return cmd, nil
|
||||
}
|
||||
|
||||
func stopServer(cmd *exec.Cmd) {
|
||||
if cmd != nil && cmd.Process != nil {
|
||||
cmd.Process.Signal(syscall.SIGTERM)
|
||||
cmd.Wait()
|
||||
|
||||
// Clean up log file
|
||||
os.Remove("telemetry-server-test.log")
|
||||
}
|
||||
}
|
||||
|
||||
func waitForServer(url string, timeout time.Duration) bool {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), timeout)
|
||||
defer cancel()
|
||||
|
||||
fmt.Printf("⏳ Waiting for server at %s...\n", url)
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return false
|
||||
default:
|
||||
resp, err := http.Get(url)
|
||||
if err == nil {
|
||||
resp.Body.Close()
|
||||
if resp.StatusCode == http.StatusOK {
|
||||
return true
|
||||
}
|
||||
}
|
||||
time.Sleep(500 * time.Millisecond)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func testProtobufMarshaling() error {
|
||||
// Test protobuf marshaling/unmarshaling
|
||||
testData := &proto.TelemetryData{
|
||||
ClusterId: "test-cluster-12345",
|
||||
Version: "test-3.45",
|
||||
Os: "linux/amd64",
|
||||
Features: []string{"filer", "s3api"},
|
||||
Deployment: "test",
|
||||
VolumeServerCount: 2,
|
||||
TotalDiskBytes: 1000000,
|
||||
TotalVolumeCount: 10,
|
||||
FilerCount: 1,
|
||||
BrokerCount: 1,
|
||||
Timestamp: time.Now().Unix(),
|
||||
}
|
||||
|
||||
// Marshal
|
||||
data, err := protobuf.Marshal(testData)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to marshal protobuf: %v", err)
|
||||
}
|
||||
|
||||
fmt.Printf(" Protobuf size: %d bytes\n", len(data))
|
||||
|
||||
// Unmarshal
|
||||
testData2 := &proto.TelemetryData{}
|
||||
if err := protobuf.Unmarshal(data, testData2); err != nil {
|
||||
return fmt.Errorf("failed to unmarshal protobuf: %v", err)
|
||||
}
|
||||
|
||||
// Verify data
|
||||
if testData2.ClusterId != testData.ClusterId {
|
||||
return fmt.Errorf("protobuf data mismatch: expected %s, got %s",
|
||||
testData.ClusterId, testData2.ClusterId)
|
||||
}
|
||||
|
||||
if testData2.VolumeServerCount != testData.VolumeServerCount {
|
||||
return fmt.Errorf("volume server count mismatch: expected %d, got %d",
|
||||
testData.VolumeServerCount, testData2.VolumeServerCount)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func testTelemetryClient() error {
|
||||
// Create telemetry client
|
||||
client := telemetry.NewClient(serverURL+"/api/collect", true)
|
||||
|
||||
// Create test data using protobuf format
|
||||
testData := &proto.TelemetryData{
|
||||
Version: "test-3.45",
|
||||
Os: "linux/amd64",
|
||||
Features: []string{"filer", "s3api", "mq"},
|
||||
Deployment: "integration-test",
|
||||
VolumeServerCount: 3,
|
||||
TotalDiskBytes: 1073741824, // 1GB
|
||||
TotalVolumeCount: 50,
|
||||
FilerCount: 2,
|
||||
BrokerCount: 1,
|
||||
Timestamp: time.Now().Unix(),
|
||||
}
|
||||
|
||||
// Send telemetry data
|
||||
if err := client.SendTelemetry(testData); err != nil {
|
||||
return fmt.Errorf("failed to send telemetry: %v", err)
|
||||
}
|
||||
|
||||
fmt.Printf(" Sent telemetry for cluster: %s\n", client.GetInstanceID())
|
||||
|
||||
// Wait a bit for processing
|
||||
time.Sleep(2 * time.Second)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func testMetricsEndpoint() error {
|
||||
resp, err := http.Get(serverURL + "/metrics")
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to get metrics: %v", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return fmt.Errorf("metrics endpoint returned status %d", resp.StatusCode)
|
||||
}
|
||||
|
||||
// Read response and check for expected metrics
|
||||
content, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to read metrics response: %v", err)
|
||||
}
|
||||
|
||||
contentStr := string(content)
|
||||
expectedMetrics := []string{
|
||||
"seaweedfs_telemetry_total_clusters",
|
||||
"seaweedfs_telemetry_active_clusters",
|
||||
"seaweedfs_telemetry_reports_received_total",
|
||||
"seaweedfs_telemetry_volume_servers",
|
||||
"seaweedfs_telemetry_disk_bytes",
|
||||
"seaweedfs_telemetry_volume_count",
|
||||
"seaweedfs_telemetry_filer_count",
|
||||
"seaweedfs_telemetry_broker_count",
|
||||
}
|
||||
|
||||
for _, metric := range expectedMetrics {
|
||||
if !strings.Contains(contentStr, metric) {
|
||||
return fmt.Errorf("missing expected metric: %s", metric)
|
||||
}
|
||||
}
|
||||
|
||||
// Check that we have at least one report received
|
||||
if !strings.Contains(contentStr, "seaweedfs_telemetry_reports_received_total 1") {
|
||||
fmt.Printf(" Warning: Expected at least 1 report received, metrics content:\n%s\n", contentStr)
|
||||
}
|
||||
|
||||
fmt.Printf(" Found %d expected metrics\n", len(expectedMetrics))
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func testStatsAPI() error {
|
||||
resp, err := http.Get(serverURL + "/api/stats")
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to get stats: %v", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return fmt.Errorf("stats API returned status %d", resp.StatusCode)
|
||||
}
|
||||
|
||||
// Read and verify JSON response
|
||||
content, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to read stats response: %v", err)
|
||||
}
|
||||
|
||||
contentStr := string(content)
|
||||
if !strings.Contains(contentStr, "total_instances") {
|
||||
return fmt.Errorf("stats response missing total_instances field")
|
||||
}
|
||||
|
||||
fmt.Printf(" Stats response: %s\n", contentStr)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func testInstancesAPI() error {
|
||||
resp, err := http.Get(serverURL + "/api/instances?limit=10")
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to get instances: %v", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return fmt.Errorf("instances API returned status %d", resp.StatusCode)
|
||||
}
|
||||
|
||||
// Read response
|
||||
content, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to read instances response: %v", err)
|
||||
}
|
||||
|
||||
fmt.Printf(" Instances response length: %d bytes\n", len(content))
|
||||
|
||||
return nil
|
||||
}
|
@@ -61,6 +61,8 @@ type MasterOptions struct {
|
||||
electionTimeout *time.Duration
|
||||
raftHashicorp *bool
|
||||
raftBootstrap *bool
|
||||
telemetryUrl *string
|
||||
telemetryEnabled *bool
|
||||
}
|
||||
|
||||
func init() {
|
||||
@@ -88,6 +90,8 @@ func init() {
|
||||
m.electionTimeout = cmdMaster.Flag.Duration("electionTimeout", 10*time.Second, "election timeout of master servers")
|
||||
m.raftHashicorp = cmdMaster.Flag.Bool("raftHashicorp", false, "use hashicorp raft")
|
||||
m.raftBootstrap = cmdMaster.Flag.Bool("raftBootstrap", false, "Whether to bootstrap the Raft cluster")
|
||||
m.telemetryUrl = cmdMaster.Flag.String("telemetry.url", "https://telemetry.seaweedfs.com:3091/api/collect", "telemetry server URL to send usage statistics")
|
||||
m.telemetryEnabled = cmdMaster.Flag.Bool("telemetry", false, "enable telemetry reporting")
|
||||
}
|
||||
|
||||
var cmdMaster = &Command{
|
||||
@@ -332,5 +336,7 @@ func (m *MasterOptions) toMasterOption(whiteList []string) *weed_server.MasterOp
|
||||
DisableHttp: *m.disableHttp,
|
||||
MetricsAddress: *m.metricsAddress,
|
||||
MetricsIntervalSec: *m.metricsIntervalSec,
|
||||
TelemetryUrl: *m.telemetryUrl,
|
||||
TelemetryEnabled: *m.telemetryEnabled,
|
||||
}
|
||||
}
|
||||
|
@@ -104,6 +104,8 @@ func init() {
|
||||
masterOptions.raftBootstrap = cmdServer.Flag.Bool("master.raftBootstrap", false, "Whether to bootstrap the Raft cluster")
|
||||
masterOptions.heartbeatInterval = cmdServer.Flag.Duration("master.heartbeatInterval", 300*time.Millisecond, "heartbeat interval of master servers, and will be randomly multiplied by [1, 1.25)")
|
||||
masterOptions.electionTimeout = cmdServer.Flag.Duration("master.electionTimeout", 10*time.Second, "election timeout of master servers")
|
||||
masterOptions.telemetryUrl = cmdServer.Flag.String("master.telemetry.url", "https://telemetry.seaweedfs.com:3091/api/collect", "telemetry server URL to send usage statistics")
|
||||
masterOptions.telemetryEnabled = cmdServer.Flag.Bool("master.telemetry", false, "enable telemetry reporting")
|
||||
|
||||
filerOptions.filerGroup = cmdServer.Flag.String("filer.filerGroup", "", "share metadata with other filers in the same filerGroup")
|
||||
filerOptions.collection = cmdServer.Flag.String("filer.collection", "", "all data will be stored in this collection")
|
||||
|
@@ -8,11 +8,13 @@ import (
|
||||
"net/url"
|
||||
"os"
|
||||
"regexp"
|
||||
"runtime"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/seaweedfs/seaweedfs/weed/stats"
|
||||
"github.com/seaweedfs/seaweedfs/weed/telemetry"
|
||||
|
||||
"github.com/seaweedfs/seaweedfs/weed/cluster"
|
||||
"github.com/seaweedfs/seaweedfs/weed/pb"
|
||||
@@ -30,6 +32,7 @@ import (
|
||||
"github.com/seaweedfs/seaweedfs/weed/topology"
|
||||
"github.com/seaweedfs/seaweedfs/weed/util"
|
||||
util_http "github.com/seaweedfs/seaweedfs/weed/util/http"
|
||||
"github.com/seaweedfs/seaweedfs/weed/util/version"
|
||||
"github.com/seaweedfs/seaweedfs/weed/wdclient"
|
||||
)
|
||||
|
||||
@@ -52,6 +55,8 @@ type MasterOption struct {
|
||||
MetricsAddress string
|
||||
MetricsIntervalSec int
|
||||
IsFollower bool
|
||||
TelemetryUrl string
|
||||
TelemetryEnabled bool
|
||||
}
|
||||
|
||||
type MasterServer struct {
|
||||
@@ -76,6 +81,9 @@ type MasterServer struct {
|
||||
adminLocks *AdminLocks
|
||||
|
||||
Cluster *cluster.Cluster
|
||||
|
||||
// telemetry
|
||||
telemetryCollector *telemetry.Collector
|
||||
}
|
||||
|
||||
func NewMasterServer(r *mux.Router, option *MasterOption, peers map[string]pb.ServerAddress) *MasterServer {
|
||||
@@ -131,6 +139,28 @@ func NewMasterServer(r *mux.Router, option *MasterOption, peers map[string]pb.Se
|
||||
ms.vg = topology.NewDefaultVolumeGrowth()
|
||||
glog.V(0).Infoln("Volume Size Limit is", ms.option.VolumeSizeLimitMB, "MB")
|
||||
|
||||
// Initialize telemetry after topology is created
|
||||
if option.TelemetryEnabled && option.TelemetryUrl != "" {
|
||||
telemetryClient := telemetry.NewClient(option.TelemetryUrl, option.TelemetryEnabled)
|
||||
ms.telemetryCollector = telemetry.NewCollector(telemetryClient, ms.Topo, ms.Cluster)
|
||||
ms.telemetryCollector.SetMasterServer(ms)
|
||||
|
||||
// Set version and OS information
|
||||
ms.telemetryCollector.SetVersion(version.VERSION_NUMBER)
|
||||
ms.telemetryCollector.SetOS(runtime.GOOS + "/" + runtime.GOARCH)
|
||||
|
||||
// Determine features and deployment type
|
||||
features := []string{"master"}
|
||||
if len(peers) > 1 {
|
||||
features = append(features, "cluster")
|
||||
}
|
||||
ms.telemetryCollector.SetFeatures(features)
|
||||
ms.telemetryCollector.SetDeployment(telemetry.DetermineDeployment(true, false, len(peers)))
|
||||
|
||||
// Start periodic telemetry collection (every 24 hours)
|
||||
ms.telemetryCollector.StartPeriodicCollection(24 * time.Hour)
|
||||
}
|
||||
|
||||
ms.guard = security.NewGuard(append(ms.option.WhiteList, whiteList...), signingKey, expiresAfterSec, readSigningKey, readExpiresAfterSec)
|
||||
|
||||
handleStaticResources2(r)
|
||||
|
100
weed/telemetry/client.go
Normal file
100
weed/telemetry/client.go
Normal file
@@ -0,0 +1,100 @@
|
||||
package telemetry
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"time"
|
||||
|
||||
"github.com/google/uuid"
|
||||
"github.com/seaweedfs/seaweedfs/telemetry/proto"
|
||||
"github.com/seaweedfs/seaweedfs/weed/glog"
|
||||
protobuf "google.golang.org/protobuf/proto"
|
||||
)
|
||||
|
||||
type Client struct {
|
||||
url string
|
||||
enabled bool
|
||||
instanceID string
|
||||
httpClient *http.Client
|
||||
}
|
||||
|
||||
// NewClient creates a new telemetry client
|
||||
func NewClient(url string, enabled bool) *Client {
|
||||
return &Client{
|
||||
url: url,
|
||||
enabled: enabled,
|
||||
instanceID: uuid.New().String(), // Generate UUID in memory only
|
||||
httpClient: &http.Client{
|
||||
Timeout: 10 * time.Second,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// IsEnabled returns whether telemetry is enabled
|
||||
func (c *Client) IsEnabled() bool {
|
||||
return c.enabled && c.url != ""
|
||||
}
|
||||
|
||||
// SendTelemetry sends telemetry data synchronously using protobuf format
|
||||
func (c *Client) SendTelemetry(data *proto.TelemetryData) error {
|
||||
if !c.IsEnabled() {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Set the cluster ID
|
||||
data.ClusterId = c.instanceID
|
||||
|
||||
return c.sendProtobuf(data)
|
||||
}
|
||||
|
||||
// SendTelemetryAsync sends telemetry data asynchronously
|
||||
func (c *Client) SendTelemetryAsync(data *proto.TelemetryData) {
|
||||
if !c.IsEnabled() {
|
||||
return
|
||||
}
|
||||
|
||||
go func() {
|
||||
if err := c.SendTelemetry(data); err != nil {
|
||||
glog.V(1).Infof("Failed to send telemetry: %v", err)
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
// sendProtobuf sends data using protobuf format
|
||||
func (c *Client) sendProtobuf(data *proto.TelemetryData) error {
|
||||
req := &proto.TelemetryRequest{
|
||||
Data: data,
|
||||
}
|
||||
|
||||
body, err := protobuf.Marshal(req)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to marshal protobuf: %v", err)
|
||||
}
|
||||
|
||||
httpReq, err := http.NewRequest("POST", c.url, bytes.NewBuffer(body))
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create request: %v", err)
|
||||
}
|
||||
|
||||
httpReq.Header.Set("Content-Type", "application/x-protobuf")
|
||||
httpReq.Header.Set("User-Agent", fmt.Sprintf("SeaweedFS/%s", data.Version))
|
||||
|
||||
resp, err := c.httpClient.Do(httpReq)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to send request: %v", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return fmt.Errorf("server returned status %d", resp.StatusCode)
|
||||
}
|
||||
|
||||
glog.V(2).Infof("Telemetry sent successfully via protobuf")
|
||||
return nil
|
||||
}
|
||||
|
||||
// GetInstanceID returns the current instance ID
|
||||
func (c *Client) GetInstanceID() string {
|
||||
return c.instanceID
|
||||
}
|
218
weed/telemetry/collector.go
Normal file
218
weed/telemetry/collector.go
Normal file
@@ -0,0 +1,218 @@
|
||||
package telemetry
|
||||
|
||||
import (
|
||||
"time"
|
||||
|
||||
"github.com/seaweedfs/seaweedfs/telemetry/proto"
|
||||
"github.com/seaweedfs/seaweedfs/weed/cluster"
|
||||
"github.com/seaweedfs/seaweedfs/weed/glog"
|
||||
"github.com/seaweedfs/seaweedfs/weed/topology"
|
||||
)
|
||||
|
||||
type Collector struct {
|
||||
client *Client
|
||||
topo *topology.Topology
|
||||
cluster *cluster.Cluster
|
||||
masterServer interface{} // Will be set to *weed_server.MasterServer to access client tracking
|
||||
features []string
|
||||
deployment string
|
||||
version string
|
||||
os string
|
||||
}
|
||||
|
||||
// NewCollector creates a new telemetry collector
|
||||
func NewCollector(client *Client, topo *topology.Topology, cluster *cluster.Cluster) *Collector {
|
||||
return &Collector{
|
||||
client: client,
|
||||
topo: topo,
|
||||
cluster: cluster,
|
||||
masterServer: nil,
|
||||
features: []string{},
|
||||
deployment: "unknown",
|
||||
version: "unknown",
|
||||
os: "unknown",
|
||||
}
|
||||
}
|
||||
|
||||
// SetFeatures sets the list of enabled features
|
||||
func (c *Collector) SetFeatures(features []string) {
|
||||
c.features = features
|
||||
}
|
||||
|
||||
// SetDeployment sets the deployment type (standalone, cluster, etc.)
|
||||
func (c *Collector) SetDeployment(deployment string) {
|
||||
c.deployment = deployment
|
||||
}
|
||||
|
||||
// SetVersion sets the SeaweedFS version
|
||||
func (c *Collector) SetVersion(version string) {
|
||||
c.version = version
|
||||
}
|
||||
|
||||
// SetOS sets the operating system information
|
||||
func (c *Collector) SetOS(os string) {
|
||||
c.os = os
|
||||
}
|
||||
|
||||
// SetMasterServer sets a reference to the master server for client tracking
|
||||
func (c *Collector) SetMasterServer(masterServer interface{}) {
|
||||
c.masterServer = masterServer
|
||||
}
|
||||
|
||||
// CollectAndSendAsync collects telemetry data and sends it asynchronously
|
||||
func (c *Collector) CollectAndSendAsync() {
|
||||
if !c.client.IsEnabled() {
|
||||
return
|
||||
}
|
||||
|
||||
go func() {
|
||||
data := c.collectData()
|
||||
c.client.SendTelemetryAsync(data)
|
||||
}()
|
||||
}
|
||||
|
||||
// StartPeriodicCollection starts sending telemetry data periodically
|
||||
func (c *Collector) StartPeriodicCollection(interval time.Duration) {
|
||||
if !c.client.IsEnabled() {
|
||||
glog.V(1).Infof("Telemetry is disabled, skipping periodic collection")
|
||||
return
|
||||
}
|
||||
|
||||
glog.V(0).Infof("Starting telemetry collection every %v", interval)
|
||||
|
||||
// Send initial telemetry after a short delay
|
||||
go func() {
|
||||
time.Sleep(30 * time.Second) // Wait for cluster to stabilize
|
||||
c.CollectAndSendAsync()
|
||||
}()
|
||||
|
||||
// Start periodic collection
|
||||
ticker := time.NewTicker(interval)
|
||||
go func() {
|
||||
defer ticker.Stop()
|
||||
for range ticker.C {
|
||||
c.CollectAndSendAsync()
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
// collectData gathers telemetry data from the topology
|
||||
func (c *Collector) collectData() *proto.TelemetryData {
|
||||
data := &proto.TelemetryData{
|
||||
Version: c.version,
|
||||
Os: c.os,
|
||||
Features: c.features,
|
||||
Deployment: c.deployment,
|
||||
Timestamp: time.Now().Unix(),
|
||||
}
|
||||
|
||||
if c.topo != nil {
|
||||
// Collect volume server count
|
||||
data.VolumeServerCount = int32(c.countVolumeServers())
|
||||
|
||||
// Collect total disk usage and volume count
|
||||
diskBytes, volumeCount := c.collectVolumeStats()
|
||||
data.TotalDiskBytes = diskBytes
|
||||
data.TotalVolumeCount = int32(volumeCount)
|
||||
}
|
||||
|
||||
if c.cluster != nil {
|
||||
// Collect filer and broker counts
|
||||
data.FilerCount = int32(c.countFilers())
|
||||
data.BrokerCount = int32(c.countBrokers())
|
||||
}
|
||||
|
||||
return data
|
||||
}
|
||||
|
||||
// countVolumeServers counts the number of active volume servers
|
||||
func (c *Collector) countVolumeServers() int {
|
||||
count := 0
|
||||
for _, dcNode := range c.topo.Children() {
|
||||
dc := dcNode.(*topology.DataCenter)
|
||||
for _, rackNode := range dc.Children() {
|
||||
rack := rackNode.(*topology.Rack)
|
||||
for range rack.Children() {
|
||||
count++
|
||||
}
|
||||
}
|
||||
}
|
||||
return count
|
||||
}
|
||||
|
||||
// collectVolumeStats collects total disk usage and volume count
|
||||
func (c *Collector) collectVolumeStats() (uint64, int) {
|
||||
var totalDiskBytes uint64
|
||||
var totalVolumeCount int
|
||||
|
||||
for _, dcNode := range c.topo.Children() {
|
||||
dc := dcNode.(*topology.DataCenter)
|
||||
for _, rackNode := range dc.Children() {
|
||||
rack := rackNode.(*topology.Rack)
|
||||
for _, dnNode := range rack.Children() {
|
||||
dn := dnNode.(*topology.DataNode)
|
||||
volumes := dn.GetVolumes()
|
||||
for _, volumeInfo := range volumes {
|
||||
totalVolumeCount++
|
||||
totalDiskBytes += volumeInfo.Size
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return totalDiskBytes, totalVolumeCount
|
||||
}
|
||||
|
||||
// countFilers counts the number of active filer servers across all groups
|
||||
func (c *Collector) countFilers() int {
|
||||
// Count all filer-type nodes in the cluster
|
||||
// This includes both pure filer servers and S3 servers (which register as filers)
|
||||
count := 0
|
||||
for _, groupName := range c.getAllFilerGroups() {
|
||||
nodes := c.cluster.ListClusterNode(cluster.FilerGroupName(groupName), cluster.FilerType)
|
||||
count += len(nodes)
|
||||
}
|
||||
return count
|
||||
}
|
||||
|
||||
// countBrokers counts the number of active broker servers
|
||||
func (c *Collector) countBrokers() int {
|
||||
// Count brokers across all broker groups
|
||||
count := 0
|
||||
for _, groupName := range c.getAllBrokerGroups() {
|
||||
nodes := c.cluster.ListClusterNode(cluster.FilerGroupName(groupName), cluster.BrokerType)
|
||||
count += len(nodes)
|
||||
}
|
||||
return count
|
||||
}
|
||||
|
||||
// getAllFilerGroups returns all filer group names
|
||||
func (c *Collector) getAllFilerGroups() []string {
|
||||
// For simplicity, we check the default group
|
||||
// In a more sophisticated implementation, we could enumerate all groups
|
||||
return []string{""}
|
||||
}
|
||||
|
||||
// getAllBrokerGroups returns all broker group names
|
||||
func (c *Collector) getAllBrokerGroups() []string {
|
||||
// For simplicity, we check the default group
|
||||
// In a more sophisticated implementation, we could enumerate all groups
|
||||
return []string{""}
|
||||
}
|
||||
|
||||
// DetermineDeployment determines the deployment type based on configuration
|
||||
func DetermineDeployment(isMasterEnabled, isVolumeEnabled bool, peerCount int) string {
|
||||
if isMasterEnabled && isVolumeEnabled {
|
||||
if peerCount > 1 {
|
||||
return "cluster"
|
||||
}
|
||||
return "standalone"
|
||||
}
|
||||
if isMasterEnabled {
|
||||
return "master-only"
|
||||
}
|
||||
if isVolumeEnabled {
|
||||
return "volume-only"
|
||||
}
|
||||
return "unknown"
|
||||
}
|
Reference in New Issue
Block a user