Files
seaweedfs/weed/storage/volume.go

371 lines
10 KiB
Go
Raw Normal View History

package storage
import (
2012-11-20 01:45:36 -08:00
"fmt"
"path"
"strconv"
"sync"
"time"
2019-04-18 21:43:36 -07:00
"github.com/seaweedfs/seaweedfs/weed/pb/master_pb"
"github.com/seaweedfs/seaweedfs/weed/pb/volume_server_pb"
"github.com/seaweedfs/seaweedfs/weed/stats"
"github.com/seaweedfs/seaweedfs/weed/storage/backend"
"github.com/seaweedfs/seaweedfs/weed/storage/needle"
"github.com/seaweedfs/seaweedfs/weed/storage/super_block"
"github.com/seaweedfs/seaweedfs/weed/storage/types"
"github.com/seaweedfs/seaweedfs/weed/glog"
)
type Volume struct {
Id needle.VolumeId
dir string
dirIdx string
Collection string
DataBackend backend.BackendStorageFile
nm NeedleMapper
tmpNm TempNeedleMapper
needleMapKind NeedleMapKind
noWriteOrDelete bool // if readonly, either noWriteOrDelete or noWriteCanDelete
noWriteCanDelete bool // if readonly, either noWriteOrDelete or noWriteCanDelete
noWriteLock sync.RWMutex
hasRemoteFile bool // if the volume has a remote file
2019-10-21 22:57:01 -07:00
MemoryMapMaxSizeMb uint32
2019-12-23 12:48:20 -08:00
super_block.SuperBlock
2012-11-20 01:45:36 -08:00
dataFileAccessLock sync.RWMutex
superBlockAccessLock sync.Mutex
asyncRequestsChan chan *needle.AsyncRequest
2020-07-03 16:34:31 -07:00
lastModifiedTsSeconds uint64 // unix time in seconds
lastAppendAtNs uint64 // unix time in nanoseconds
2016-10-07 16:22:24 +08:00
lastCompactIndexOffset uint64
lastCompactRevision uint16
ldbTimeout int64
isCompacting bool
isCommitCompacting bool
volumeInfoRWLock sync.RWMutex
volumeInfo *volume_server_pb.VolumeInfo
location *DiskLocation
Admin: misc improvements on admin server and workers. EC now works. (#7055) * initial design * added simulation as tests * reorganized the codebase to move the simulation framework and tests into their own dedicated package * integration test. ec worker task * remove "enhanced" reference * start master, volume servers, filer Current Status ✅ Master: Healthy and running (port 9333) ✅ Filer: Healthy and running (port 8888) ✅ Volume Servers: All 6 servers running (ports 8080-8085) 🔄 Admin/Workers: Will start when dependencies are ready * generate write load * tasks are assigned * admin start wtih grpc port. worker has its own working directory * Update .gitignore * working worker and admin. Task detection is not working yet. * compiles, detection uses volumeSizeLimitMB from master * compiles * worker retries connecting to admin * build and restart * rendering pending tasks * skip task ID column * sticky worker id * test canScheduleTaskNow * worker reconnect to admin * clean up logs * worker register itself first * worker can run ec work and report status but: 1. one volume should not be repeatedly worked on. 2. ec shards needs to be distributed and source data should be deleted. * move ec task logic * listing ec shards * local copy, ec. Need to distribute. * ec is mostly working now * distribution of ec shards needs improvement * need configuration to enable ec * show ec volumes * interval field UI component * rename * integration test with vauuming * garbage percentage threshold * fix warning * display ec shard sizes * fix ec volumes list * Update ui.go * show default values * ensure correct default value * MaintenanceConfig use ConfigField * use schema defined defaults * config * reduce duplication * refactor to use BaseUIProvider * each task register its schema * checkECEncodingCandidate use ecDetector * use vacuumDetector * use volumeSizeLimitMB * remove remove * remove unused * refactor * use new framework * remove v2 reference * refactor * left menu can scroll now * The maintenance manager was not being initialized when no data directory was configured for persistent storage. * saving config * Update task_config_schema_templ.go * enable/disable tasks * protobuf encoded task configurations * fix system settings * use ui component * remove logs * interface{} Reduction * reduce interface{} * reduce interface{} * avoid from/to map * reduce interface{} * refactor * keep it DRY * added logging * debug messages * debug level * debug * show the log caller line * use configured task policy * log level * handle admin heartbeat response * Update worker.go * fix EC rack and dc count * Report task status to admin server * fix task logging, simplify interface checking, use erasure_coding constants * factor in empty volume server during task planning * volume.list adds disk id * track disk id also * fix locking scheduled and manual scanning * add active topology * simplify task detector * ec task completed, but shards are not showing up * implement ec in ec_typed.go * adjust log level * dedup * implementing ec copying shards and only ecx files * use disk id when distributing ec shards 🎯 Planning: ActiveTopology creates DestinationPlan with specific TargetDisk 📦 Task Creation: maintenance_integration.go creates ECDestination with DiskId 🚀 Task Execution: EC task passes DiskId in VolumeEcShardsCopyRequest 💾 Volume Server: Receives disk_id and stores shards on specific disk (vs.store.Locations[req.DiskId]) 📂 File System: EC shards and metadata land in the exact disk directory planned * Delete original volume from all locations * clean up existing shard locations * local encoding and distributing * Update docker/admin_integration/EC-TESTING-README.md Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> * check volume id range * simplify * fix tests * fix types * clean up logs and tests --------- Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
2025-07-30 12:38:03 -07:00
diskId uint32 // ID of this volume's disk in Store.Locations array
lastIoError error
}
func NewVolume(dirname string, dirIdx string, collection string, id needle.VolumeId, needleMapKind NeedleMapKind, replicaPlacement *super_block.ReplicaPlacement, ttl *needle.TTL, preallocate int64, ver needle.Version, memoryMapMaxSizeMb uint32, ldbTimeout int64) (v *Volume, e error) {
2018-06-23 16:48:19 -07:00
// if replicaPlacement is nil, the superblock will be loaded from disk
v = &Volume{dir: dirname, dirIdx: dirIdx, Collection: collection, Id: id, MemoryMapMaxSizeMb: memoryMapMaxSizeMb,
asyncRequestsChan: make(chan *needle.AsyncRequest, 128)}
2019-12-23 12:48:20 -08:00
v.SuperBlock = super_block.SuperBlock{ReplicaPlacement: replicaPlacement, Ttl: ttl}
v.needleMapKind = needleMapKind
v.ldbTimeout = ldbTimeout
e = v.load(true, true, needleMapKind, preallocate, ver)
v.startWorker()
return
}
func (v *Volume) String() string {
v.noWriteLock.RLock()
defer v.noWriteLock.RUnlock()
return fmt.Sprintf("Id:%v dir:%s dirIdx:%s Collection:%s dataFile:%v nm:%v noWrite:%v canDelete:%v", v.Id, v.dir, v.dirIdx, v.Collection, v.DataBackend, v.nm, v.noWriteOrDelete || v.noWriteCanDelete, v.noWriteCanDelete)
}
2019-06-03 02:26:31 -07:00
func VolumeFileName(dir string, collection string, id int) (fileName string) {
idString := strconv.Itoa(id)
if collection == "" {
fileName = path.Join(dir, idString)
} else {
fileName = path.Join(dir, collection+"_"+idString)
}
return
2014-01-21 20:51:46 -08:00
}
func (v *Volume) DataFileName() (fileName string) {
2019-06-03 02:26:31 -07:00
return VolumeFileName(v.dir, v.Collection, int(v.Id))
}
func (v *Volume) IndexFileName() (fileName string) {
return VolumeFileName(v.dirIdx, v.Collection, int(v.Id))
}
func (v *Volume) FileName(ext string) (fileName string) {
switch ext {
case ".idx", ".cpx", ".ldb", ".cpldb":
return VolumeFileName(v.dirIdx, v.Collection, int(v.Id)) + ext
}
// .dat, .cpd, .vif
return VolumeFileName(v.dir, v.Collection, int(v.Id)) + ext
}
2019-04-18 21:43:36 -07:00
func (v *Volume) Version() needle.Version {
v.superBlockAccessLock.Lock()
defer v.superBlockAccessLock.Unlock()
if v.volumeInfo.Version != 0 {
v.SuperBlock.Version = needle.Version(v.volumeInfo.Version)
}
2019-12-23 12:48:20 -08:00
return v.SuperBlock.Version
}
2019-04-19 00:39:34 -07:00
func (v *Volume) FileStat() (datSize uint64, idxSize uint64, modTime time.Time) {
v.dataFileAccessLock.RLock()
defer v.dataFileAccessLock.RUnlock()
if v.DataBackend == nil {
2019-04-19 00:39:34 -07:00
return
}
datFileSize, modTime, e := v.DataBackend.GetStat()
if e == nil {
return uint64(datFileSize), v.nm.IndexFileSize(), modTime
}
2019-12-08 19:44:16 -08:00
glog.V(0).Infof("Failed to read file size %s %v", v.DataBackend.Name(), e)
2019-04-19 00:39:34 -07:00
return // -1 causes integer overflow and the volume to become unwritable.
}
func (v *Volume) ContentSize() uint64 {
v.dataFileAccessLock.RLock()
defer v.dataFileAccessLock.RUnlock()
if v.nm == nil {
return 0
}
return v.nm.ContentSize()
}
func (v *Volume) doIsEmpty() (bool, error) {
// check v.DataBackend.GetStat()
if v.DataBackend == nil {
return false, fmt.Errorf("v.DataBackend is nil")
} else {
datFileSize, _, e := v.DataBackend.GetStat()
if e != nil {
glog.V(0).Infof("Failed to read file size %s %v", v.DataBackend.Name(), e)
return false, fmt.Errorf("v.DataBackend.GetStat(): %v", e)
}
if datFileSize > super_block.SuperBlockSize {
return false, nil
}
}
// check v.nm.ContentSize()
if v.nm != nil {
if v.nm.ContentSize() > 0 {
return false, nil
}
}
return true, nil
}
func (v *Volume) DeletedSize() uint64 {
v.dataFileAccessLock.RLock()
defer v.dataFileAccessLock.RUnlock()
if v.nm == nil {
return 0
}
return v.nm.DeletedSize()
}
2019-04-18 00:19:18 -07:00
func (v *Volume) FileCount() uint64 {
v.dataFileAccessLock.RLock()
defer v.dataFileAccessLock.RUnlock()
if v.nm == nil {
return 0
}
2019-04-10 23:39:53 -07:00
return uint64(v.nm.FileCount())
}
func (v *Volume) DeletedCount() uint64 {
v.dataFileAccessLock.RLock()
defer v.dataFileAccessLock.RUnlock()
if v.nm == nil {
return 0
}
return uint64(v.nm.DeletedCount())
}
func (v *Volume) MaxFileKey() types.NeedleId {
v.dataFileAccessLock.RLock()
defer v.dataFileAccessLock.RUnlock()
if v.nm == nil {
return 0
}
return v.nm.MaxFileKey()
}
func (v *Volume) IndexFileSize() uint64 {
v.dataFileAccessLock.RLock()
defer v.dataFileAccessLock.RUnlock()
if v.nm == nil {
return 0
}
return v.nm.IndexFileSize()
}
2021-02-16 02:47:02 -08:00
func (v *Volume) DiskType() types.DiskType {
2020-12-13 23:08:21 -08:00
return v.location.DiskType
}
func (v *Volume) SyncToDisk() {
v.dataFileAccessLock.Lock()
defer v.dataFileAccessLock.Unlock()
if v.nm != nil {
if err := v.nm.Sync(); err != nil {
glog.Warningf("Volume Close fail to sync volume idx %d", v.Id)
}
}
if v.DataBackend != nil {
if err := v.DataBackend.Sync(); err != nil {
glog.Warningf("Volume Close fail to sync volume %d", v.Id)
}
}
}
// Close cleanly shuts down this volume
func (v *Volume) Close() {
2015-05-23 10:16:01 -07:00
v.dataFileAccessLock.Lock()
defer v.dataFileAccessLock.Unlock()
v.doClose()
}
func (v *Volume) doClose() {
for v.isCommitCompacting {
time.Sleep(521 * time.Millisecond)
glog.Warningf("Volume Close wait for compaction %d", v.Id)
}
if v.nm != nil {
if err := v.nm.Sync(); err != nil {
glog.Warningf("Volume Close fail to sync volume idx %d", v.Id)
}
v.nm.Close()
v.nm = nil
}
if v.DataBackend != nil {
if err := v.DataBackend.Close(); err != nil {
glog.Warningf("Volume Close fail to sync volume %d", v.Id)
}
v.DataBackend = nil
2024-04-17 19:49:50 +08:00
stats.VolumeServerVolumeGauge.WithLabelValues(v.Collection, "volume").Dec()
}
}
2012-11-20 00:54:37 -08:00
func (v *Volume) NeedToReplicate() bool {
return v.ReplicaPlacement.GetCopyCount() > 1
}
// volume is expired if modified time + volume ttl < now
// except when volume is empty
// or when the volume does not have a ttl
// or when volumeSizeLimit is 0 when server just starts
2020-10-24 19:40:35 -07:00
func (v *Volume) expired(contentSize uint64, volumeSizeLimit uint64) bool {
if volumeSizeLimit == 0 {
2020-07-03 16:34:31 -07:00
// skip if we don't know size limit
return false
}
2020-10-24 19:40:35 -07:00
if contentSize <= super_block.SuperBlockSize {
return false
}
if v.Ttl == nil || v.Ttl.Minutes() == 0 {
return false
}
2020-12-11 16:57:53 -08:00
glog.V(2).Infof("volume %d now:%v lastModified:%v", v.Id, time.Now().Unix(), v.lastModifiedTsSeconds)
2019-04-19 00:39:34 -07:00
livedMinutes := (time.Now().Unix() - int64(v.lastModifiedTsSeconds)) / 60
2020-12-11 16:57:53 -08:00
glog.V(2).Infof("volume %d ttl:%v lived:%v", v.Id, v.Ttl, livedMinutes)
if int64(v.Ttl.Minutes()) < livedMinutes {
return true
}
return false
}
// wait either maxDelayMinutes or 10% of ttl minutes
2019-01-17 09:17:19 +08:00
func (v *Volume) expiredLongEnough(maxDelayMinutes uint32) bool {
if v.Ttl == nil || v.Ttl.Minutes() == 0 {
return false
}
removalDelay := v.Ttl.Minutes() / 10
if removalDelay > maxDelayMinutes {
removalDelay = maxDelayMinutes
}
2019-04-19 00:39:34 -07:00
if uint64(v.Ttl.Minutes()+removalDelay)*60+v.lastModifiedTsSeconds < uint64(time.Now().Unix()) {
return true
}
return false
}
2019-03-17 20:27:08 -07:00
2021-03-13 11:04:51 -08:00
func (v *Volume) collectStatus() (maxFileKey types.NeedleId, datFileSize int64, modTime time.Time, fileCount, deletedCount, deletedSize uint64, ok bool) {
2020-10-24 19:40:35 -07:00
v.dataFileAccessLock.RLock()
defer v.dataFileAccessLock.RUnlock()
2022-09-04 22:21:24 -07:00
glog.V(4).Infof("collectStatus volume %d", v.Id)
2021-03-13 11:04:51 -08:00
if v.nm == nil || v.DataBackend == nil {
2021-03-13 11:04:51 -08:00
return
}
ok = true
2020-10-24 19:40:35 -07:00
maxFileKey = v.nm.MaxFileKey()
datFileSize, modTime, _ = v.DataBackend.GetStat()
fileCount = uint64(v.nm.FileCount())
deletedCount = uint64(v.nm.DeletedCount())
deletedSize = v.nm.DeletedSize()
return
}
func (v *Volume) ToVolumeInformationMessage() (types.NeedleId, *master_pb.VolumeInformationMessage) {
2021-03-13 11:04:51 -08:00
maxFileKey, volumeSize, modTime, fileCount, deletedCount, deletedSize, ok := v.collectStatus()
if !ok {
return 0, nil
}
2020-10-24 19:40:35 -07:00
volumeInfo := &master_pb.VolumeInformationMessage{
2019-03-17 20:27:08 -07:00
Id: uint32(v.Id),
2020-10-24 19:40:35 -07:00
Size: uint64(volumeSize),
2019-03-17 20:27:08 -07:00
Collection: v.Collection,
2020-10-24 19:40:35 -07:00
FileCount: fileCount,
DeleteCount: deletedCount,
DeletedByteCount: deletedSize,
ReadOnly: v.IsReadOnly(),
2019-03-17 20:27:08 -07:00
ReplicaPlacement: uint32(v.ReplicaPlacement.Byte()),
Version: uint32(v.Version()),
Ttl: v.Ttl.ToUint32(),
CompactRevision: uint32(v.SuperBlock.CompactionRevision),
ModifiedAtSecond: modTime.Unix(),
2020-12-13 23:08:21 -08:00
DiskType: string(v.location.DiskType),
Admin: misc improvements on admin server and workers. EC now works. (#7055) * initial design * added simulation as tests * reorganized the codebase to move the simulation framework and tests into their own dedicated package * integration test. ec worker task * remove "enhanced" reference * start master, volume servers, filer Current Status ✅ Master: Healthy and running (port 9333) ✅ Filer: Healthy and running (port 8888) ✅ Volume Servers: All 6 servers running (ports 8080-8085) 🔄 Admin/Workers: Will start when dependencies are ready * generate write load * tasks are assigned * admin start wtih grpc port. worker has its own working directory * Update .gitignore * working worker and admin. Task detection is not working yet. * compiles, detection uses volumeSizeLimitMB from master * compiles * worker retries connecting to admin * build and restart * rendering pending tasks * skip task ID column * sticky worker id * test canScheduleTaskNow * worker reconnect to admin * clean up logs * worker register itself first * worker can run ec work and report status but: 1. one volume should not be repeatedly worked on. 2. ec shards needs to be distributed and source data should be deleted. * move ec task logic * listing ec shards * local copy, ec. Need to distribute. * ec is mostly working now * distribution of ec shards needs improvement * need configuration to enable ec * show ec volumes * interval field UI component * rename * integration test with vauuming * garbage percentage threshold * fix warning * display ec shard sizes * fix ec volumes list * Update ui.go * show default values * ensure correct default value * MaintenanceConfig use ConfigField * use schema defined defaults * config * reduce duplication * refactor to use BaseUIProvider * each task register its schema * checkECEncodingCandidate use ecDetector * use vacuumDetector * use volumeSizeLimitMB * remove remove * remove unused * refactor * use new framework * remove v2 reference * refactor * left menu can scroll now * The maintenance manager was not being initialized when no data directory was configured for persistent storage. * saving config * Update task_config_schema_templ.go * enable/disable tasks * protobuf encoded task configurations * fix system settings * use ui component * remove logs * interface{} Reduction * reduce interface{} * reduce interface{} * avoid from/to map * reduce interface{} * refactor * keep it DRY * added logging * debug messages * debug level * debug * show the log caller line * use configured task policy * log level * handle admin heartbeat response * Update worker.go * fix EC rack and dc count * Report task status to admin server * fix task logging, simplify interface checking, use erasure_coding constants * factor in empty volume server during task planning * volume.list adds disk id * track disk id also * fix locking scheduled and manual scanning * add active topology * simplify task detector * ec task completed, but shards are not showing up * implement ec in ec_typed.go * adjust log level * dedup * implementing ec copying shards and only ecx files * use disk id when distributing ec shards 🎯 Planning: ActiveTopology creates DestinationPlan with specific TargetDisk 📦 Task Creation: maintenance_integration.go creates ECDestination with DiskId 🚀 Task Execution: EC task passes DiskId in VolumeEcShardsCopyRequest 💾 Volume Server: Receives disk_id and stores shards on specific disk (vs.store.Locations[req.DiskId]) 📂 File System: EC shards and metadata land in the exact disk directory planned * Delete original volume from all locations * clean up existing shard locations * local encoding and distributing * Update docker/admin_integration/EC-TESTING-README.md Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> * check volume id range * simplify * fix tests * fix types * clean up logs and tests --------- Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
2025-07-30 12:38:03 -07:00
DiskId: v.diskId,
2019-03-17 20:27:08 -07:00
}
2020-10-24 19:40:35 -07:00
volumeInfo.RemoteStorageName, volumeInfo.RemoteStorageKey = v.RemoteStorageNameKey()
2020-10-24 19:40:35 -07:00
return maxFileKey, volumeInfo
}
func (v *Volume) RemoteStorageNameKey() (storageName, storageKey string) {
2019-12-28 11:21:49 -08:00
if v.volumeInfo == nil {
2019-12-25 16:17:58 -08:00
return
}
2019-12-28 11:21:49 -08:00
if len(v.volumeInfo.GetFiles()) == 0 {
return
}
2019-12-28 11:21:49 -08:00
return v.volumeInfo.GetFiles()[0].BackendName(), v.volumeInfo.GetFiles()[0].GetKey()
2019-03-17 20:27:08 -07:00
}
func (v *Volume) IsReadOnly() bool {
v.noWriteLock.RLock()
defer v.noWriteLock.RUnlock()
2020-07-03 16:34:31 -07:00
return v.noWriteOrDelete || v.noWriteCanDelete || v.location.isDiskSpaceLow
}
func (v *Volume) PersistReadOnly(readOnly bool) {
v.volumeInfoRWLock.RLock()
defer v.volumeInfoRWLock.RUnlock()
v.volumeInfo.ReadOnly = readOnly
v.SaveVolumeInfo()
}