mirror of
https://github.com/seaweedfs/seaweedfs.git
synced 2025-11-08 23:34:44 +08:00
* Fixed critical bugs in the Azure SDK migration (PR #7310) fix https://github.com/seaweedfs/seaweedfs/issues/5044 * purge emojis * conditional delete * Update azure_sink_test.go * refactoring * refactor * add context to each call * refactor * address comments * refactor * defer * DeleteSnapshots The conditional delete in handleExistingBlob was missing DeleteSnapshots, which would cause the delete operation to fail on Azure storage accounts that have blob snapshots enabled. * ensure the expected size * adjust comment
358 lines
11 KiB
Go
358 lines
11 KiB
Go
package azure
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"io"
|
|
"os"
|
|
"reflect"
|
|
"regexp"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/Azure/azure-sdk-for-go/sdk/azcore"
|
|
"github.com/Azure/azure-sdk-for-go/sdk/azcore/policy"
|
|
"github.com/Azure/azure-sdk-for-go/sdk/azcore/to"
|
|
"github.com/Azure/azure-sdk-for-go/sdk/storage/azblob"
|
|
"github.com/Azure/azure-sdk-for-go/sdk/storage/azblob/blob"
|
|
"github.com/Azure/azure-sdk-for-go/sdk/storage/azblob/bloberror"
|
|
"github.com/Azure/azure-sdk-for-go/sdk/storage/azblob/blockblob"
|
|
"github.com/Azure/azure-sdk-for-go/sdk/storage/azblob/container"
|
|
"github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
|
|
"github.com/seaweedfs/seaweedfs/weed/pb/remote_pb"
|
|
"github.com/seaweedfs/seaweedfs/weed/remote_storage"
|
|
"github.com/seaweedfs/seaweedfs/weed/s3api/s3_constants"
|
|
"github.com/seaweedfs/seaweedfs/weed/util"
|
|
)
|
|
|
|
const (
|
|
defaultBlockSize = 4 * 1024 * 1024
|
|
defaultConcurrency = 16
|
|
|
|
// DefaultAzureOpTimeout is the timeout for individual Azure blob operations.
|
|
// This should be larger than the maximum time the Azure SDK client will spend
|
|
// retrying. With MaxRetries=3 (4 total attempts) and TryTimeout=10s, the maximum
|
|
// time is roughly 4*10s + delays(~7s) = 47s. We use 60s to provide a reasonable
|
|
// buffer while still failing faster than indefinite hangs.
|
|
DefaultAzureOpTimeout = 60 * time.Second
|
|
)
|
|
|
|
// DefaultAzBlobClientOptions returns the default Azure blob client options
|
|
// with consistent retry configuration across the application.
|
|
// This centralizes the retry policy to ensure uniform behavior between
|
|
// remote storage and replication sink implementations.
|
|
//
|
|
// Related: Use DefaultAzureOpTimeout for context.WithTimeout when calling Azure operations
|
|
// to ensure the timeout accommodates all retry attempts configured here.
|
|
func DefaultAzBlobClientOptions() *azblob.ClientOptions {
|
|
return &azblob.ClientOptions{
|
|
ClientOptions: azcore.ClientOptions{
|
|
Retry: policy.RetryOptions{
|
|
MaxRetries: 3, // Reasonable retry count - aggressive retries mask configuration errors
|
|
TryTimeout: 10 * time.Second, // Reduced from 1 minute to fail faster on auth issues
|
|
RetryDelay: 1 * time.Second,
|
|
MaxRetryDelay: 10 * time.Second,
|
|
},
|
|
},
|
|
}
|
|
}
|
|
|
|
// invalidMetadataChars matches any character that is not valid in Azure metadata keys.
|
|
// Azure metadata keys must be valid C# identifiers: letters, digits, and underscores only.
|
|
var invalidMetadataChars = regexp.MustCompile(`[^a-zA-Z0-9_]`)
|
|
|
|
// sanitizeMetadataKey converts an S3 metadata key to a valid Azure metadata key.
|
|
// Azure metadata keys must be valid C# identifiers (letters, digits, underscores only, cannot start with digit).
|
|
// To prevent collisions, invalid characters are replaced with their hex representation (_XX_).
|
|
// Examples:
|
|
// - "my-key" -> "my_2d_key"
|
|
// - "my.key" -> "my_2e_key"
|
|
// - "key@value" -> "key_40_value"
|
|
func sanitizeMetadataKey(key string) string {
|
|
// Replace each invalid character with _XX_ where XX is the hex code
|
|
result := invalidMetadataChars.ReplaceAllStringFunc(key, func(s string) string {
|
|
return fmt.Sprintf("_%02x_", s[0])
|
|
})
|
|
|
|
// Azure metadata keys cannot start with a digit
|
|
if len(result) > 0 && result[0] >= '0' && result[0] <= '9' {
|
|
result = "_" + result
|
|
}
|
|
|
|
return result
|
|
}
|
|
|
|
func init() {
|
|
remote_storage.RemoteStorageClientMakers["azure"] = new(azureRemoteStorageMaker)
|
|
}
|
|
|
|
type azureRemoteStorageMaker struct{}
|
|
|
|
func (s azureRemoteStorageMaker) HasBucket() bool {
|
|
return true
|
|
}
|
|
|
|
func (s azureRemoteStorageMaker) Make(conf *remote_pb.RemoteConf) (remote_storage.RemoteStorageClient, error) {
|
|
|
|
client := &azureRemoteStorageClient{
|
|
conf: conf,
|
|
}
|
|
|
|
accountName, accountKey := conf.AzureAccountName, conf.AzureAccountKey
|
|
if len(accountName) == 0 || len(accountKey) == 0 {
|
|
accountName, accountKey = os.Getenv("AZURE_STORAGE_ACCOUNT"), os.Getenv("AZURE_STORAGE_ACCESS_KEY")
|
|
if len(accountName) == 0 || len(accountKey) == 0 {
|
|
return nil, fmt.Errorf("either AZURE_STORAGE_ACCOUNT or AZURE_STORAGE_ACCESS_KEY environment variable is not set")
|
|
}
|
|
}
|
|
|
|
// Create credential and client
|
|
credential, err := azblob.NewSharedKeyCredential(accountName, accountKey)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("invalid Azure credential with account name:%s: %w", accountName, err)
|
|
}
|
|
|
|
serviceURL := fmt.Sprintf("https://%s.blob.core.windows.net/", accountName)
|
|
azClient, err := azblob.NewClientWithSharedKeyCredential(serviceURL, credential, DefaultAzBlobClientOptions())
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to create Azure client: %w", err)
|
|
}
|
|
|
|
client.client = azClient
|
|
|
|
return client, nil
|
|
}
|
|
|
|
type azureRemoteStorageClient struct {
|
|
conf *remote_pb.RemoteConf
|
|
client *azblob.Client
|
|
}
|
|
|
|
var _ = remote_storage.RemoteStorageClient(&azureRemoteStorageClient{})
|
|
|
|
func (az *azureRemoteStorageClient) Traverse(loc *remote_pb.RemoteStorageLocation, visitFn remote_storage.VisitFunc) (err error) {
|
|
|
|
pathKey := loc.Path[1:]
|
|
containerClient := az.client.ServiceClient().NewContainerClient(loc.Bucket)
|
|
|
|
// List blobs with pager
|
|
pager := containerClient.NewListBlobsFlatPager(&container.ListBlobsFlatOptions{
|
|
Prefix: &pathKey,
|
|
})
|
|
|
|
for pager.More() {
|
|
resp, err := pager.NextPage(context.Background())
|
|
if err != nil {
|
|
return fmt.Errorf("azure traverse %s%s: %w", loc.Bucket, loc.Path, err)
|
|
}
|
|
|
|
for _, blobItem := range resp.Segment.BlobItems {
|
|
if blobItem.Name == nil {
|
|
continue
|
|
}
|
|
key := "/" + *blobItem.Name
|
|
dir, name := util.FullPath(key).DirAndName()
|
|
|
|
remoteEntry := &filer_pb.RemoteEntry{
|
|
StorageName: az.conf.Name,
|
|
}
|
|
if blobItem.Properties != nil {
|
|
if blobItem.Properties.LastModified != nil {
|
|
remoteEntry.RemoteMtime = blobItem.Properties.LastModified.Unix()
|
|
}
|
|
if blobItem.Properties.ContentLength != nil {
|
|
remoteEntry.RemoteSize = *blobItem.Properties.ContentLength
|
|
}
|
|
if blobItem.Properties.ETag != nil {
|
|
remoteEntry.RemoteETag = string(*blobItem.Properties.ETag)
|
|
}
|
|
}
|
|
|
|
err = visitFn(dir, name, false, remoteEntry)
|
|
if err != nil {
|
|
return fmt.Errorf("azure processing %s%s: %w", loc.Bucket, loc.Path, err)
|
|
}
|
|
}
|
|
}
|
|
|
|
return
|
|
}
|
|
|
|
func (az *azureRemoteStorageClient) ReadFile(loc *remote_pb.RemoteStorageLocation, offset int64, size int64) (data []byte, err error) {
|
|
|
|
key := loc.Path[1:]
|
|
blobClient := az.client.ServiceClient().NewContainerClient(loc.Bucket).NewBlockBlobClient(key)
|
|
|
|
count := size
|
|
if count == 0 {
|
|
count = blob.CountToEnd
|
|
}
|
|
downloadResp, err := blobClient.DownloadStream(context.Background(), &blob.DownloadStreamOptions{
|
|
Range: blob.HTTPRange{
|
|
Offset: offset,
|
|
Count: count,
|
|
},
|
|
})
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to download file %s%s: %w", loc.Bucket, loc.Path, err)
|
|
}
|
|
defer downloadResp.Body.Close()
|
|
|
|
data, err = io.ReadAll(downloadResp.Body)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to read download stream %s%s: %w", loc.Bucket, loc.Path, err)
|
|
}
|
|
|
|
return
|
|
}
|
|
|
|
func (az *azureRemoteStorageClient) WriteDirectory(loc *remote_pb.RemoteStorageLocation, entry *filer_pb.Entry) (err error) {
|
|
return nil
|
|
}
|
|
|
|
func (az *azureRemoteStorageClient) RemoveDirectory(loc *remote_pb.RemoteStorageLocation) (err error) {
|
|
return nil
|
|
}
|
|
|
|
func (az *azureRemoteStorageClient) WriteFile(loc *remote_pb.RemoteStorageLocation, entry *filer_pb.Entry, reader io.Reader) (remoteEntry *filer_pb.RemoteEntry, err error) {
|
|
|
|
key := loc.Path[1:]
|
|
blobClient := az.client.ServiceClient().NewContainerClient(loc.Bucket).NewBlockBlobClient(key)
|
|
|
|
// Upload from reader
|
|
metadata := toMetadata(entry.Extended)
|
|
httpHeaders := &blob.HTTPHeaders{}
|
|
if entry.Attributes != nil && entry.Attributes.Mime != "" {
|
|
httpHeaders.BlobContentType = &entry.Attributes.Mime
|
|
}
|
|
|
|
_, err = blobClient.UploadStream(context.Background(), reader, &blockblob.UploadStreamOptions{
|
|
BlockSize: defaultBlockSize,
|
|
Concurrency: defaultConcurrency,
|
|
HTTPHeaders: httpHeaders,
|
|
Metadata: metadata,
|
|
})
|
|
if err != nil {
|
|
return nil, fmt.Errorf("azure upload to %s%s: %w", loc.Bucket, loc.Path, err)
|
|
}
|
|
|
|
// read back the remote entry
|
|
return az.readFileRemoteEntry(loc)
|
|
}
|
|
|
|
func (az *azureRemoteStorageClient) readFileRemoteEntry(loc *remote_pb.RemoteStorageLocation) (*filer_pb.RemoteEntry, error) {
|
|
key := loc.Path[1:]
|
|
blobClient := az.client.ServiceClient().NewContainerClient(loc.Bucket).NewBlockBlobClient(key)
|
|
|
|
props, err := blobClient.GetProperties(context.Background(), nil)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
remoteEntry := &filer_pb.RemoteEntry{
|
|
StorageName: az.conf.Name,
|
|
}
|
|
|
|
if props.LastModified != nil {
|
|
remoteEntry.RemoteMtime = props.LastModified.Unix()
|
|
}
|
|
if props.ContentLength != nil {
|
|
remoteEntry.RemoteSize = *props.ContentLength
|
|
}
|
|
if props.ETag != nil {
|
|
remoteEntry.RemoteETag = string(*props.ETag)
|
|
}
|
|
|
|
return remoteEntry, nil
|
|
}
|
|
|
|
func toMetadata(attributes map[string][]byte) map[string]*string {
|
|
metadata := make(map[string]*string)
|
|
for k, v := range attributes {
|
|
if strings.HasPrefix(k, s3_constants.AmzUserMetaPrefix) {
|
|
// S3 stores metadata keys in lowercase; normalize for consistency.
|
|
key := strings.ToLower(k[len(s3_constants.AmzUserMetaPrefix):])
|
|
|
|
// Sanitize key to prevent collisions and ensure Azure compliance
|
|
key = sanitizeMetadataKey(key)
|
|
|
|
val := string(v)
|
|
metadata[key] = &val
|
|
}
|
|
}
|
|
return metadata
|
|
}
|
|
|
|
func (az *azureRemoteStorageClient) UpdateFileMetadata(loc *remote_pb.RemoteStorageLocation, oldEntry *filer_pb.Entry, newEntry *filer_pb.Entry) (err error) {
|
|
if reflect.DeepEqual(oldEntry.Extended, newEntry.Extended) {
|
|
return nil
|
|
}
|
|
metadata := toMetadata(newEntry.Extended)
|
|
|
|
key := loc.Path[1:]
|
|
blobClient := az.client.ServiceClient().NewContainerClient(loc.Bucket).NewBlobClient(key)
|
|
|
|
_, err = blobClient.SetMetadata(context.Background(), metadata, nil)
|
|
|
|
return
|
|
}
|
|
|
|
func (az *azureRemoteStorageClient) DeleteFile(loc *remote_pb.RemoteStorageLocation) (err error) {
|
|
key := loc.Path[1:]
|
|
blobClient := az.client.ServiceClient().NewContainerClient(loc.Bucket).NewBlobClient(key)
|
|
|
|
_, err = blobClient.Delete(context.Background(), &blob.DeleteOptions{
|
|
DeleteSnapshots: to.Ptr(blob.DeleteSnapshotsOptionTypeInclude),
|
|
})
|
|
if err != nil {
|
|
// Make delete idempotent - don't return error if blob doesn't exist
|
|
if bloberror.HasCode(err, bloberror.BlobNotFound) {
|
|
return nil
|
|
}
|
|
return fmt.Errorf("azure delete %s%s: %w", loc.Bucket, loc.Path, err)
|
|
}
|
|
return
|
|
}
|
|
|
|
func (az *azureRemoteStorageClient) ListBuckets() (buckets []*remote_storage.Bucket, err error) {
|
|
pager := az.client.NewListContainersPager(nil)
|
|
|
|
for pager.More() {
|
|
resp, err := pager.NextPage(context.Background())
|
|
if err != nil {
|
|
return buckets, err
|
|
}
|
|
|
|
for _, containerItem := range resp.ContainerItems {
|
|
if containerItem.Name != nil {
|
|
bucket := &remote_storage.Bucket{
|
|
Name: *containerItem.Name,
|
|
}
|
|
if containerItem.Properties != nil && containerItem.Properties.LastModified != nil {
|
|
bucket.CreatedAt = *containerItem.Properties.LastModified
|
|
}
|
|
buckets = append(buckets, bucket)
|
|
}
|
|
}
|
|
}
|
|
return
|
|
}
|
|
|
|
func (az *azureRemoteStorageClient) CreateBucket(name string) (err error) {
|
|
containerClient := az.client.ServiceClient().NewContainerClient(name)
|
|
_, err = containerClient.Create(context.Background(), nil)
|
|
if err != nil {
|
|
return fmt.Errorf("create bucket %s: %w", name, err)
|
|
}
|
|
return
|
|
}
|
|
|
|
func (az *azureRemoteStorageClient) DeleteBucket(name string) (err error) {
|
|
containerClient := az.client.ServiceClient().NewContainerClient(name)
|
|
_, err = containerClient.Delete(context.Background(), nil)
|
|
if err != nil {
|
|
return fmt.Errorf("delete bucket %s: %w", name, err)
|
|
}
|
|
return
|
|
}
|