Feature limit caching to prescribed number of bytes per file (#6009)

* feature: we can check if a fileId is already in the cache

We using this to protect cache from adding the same needle to
the cache over and over.

* fuse mount: Do not start dowloader if needle is already in the cache

* added maxFilePartSizeInCache property to ChunkCache

If file very large only first maxFilePartSizeInCache bytes
are going to be put to the cache (subject to the needle size
constrains).

* feature: for large files put in cache no more than prescribed number of bytes

Before this patch only the first needle of a large file was intended for
caching. This patch uses maximum prescribed amount of bytes to be put in
cache. This allows to bypass default 2MB maximum for a file part stored
in the cache.

* added dummy mock methods to satisfy interfaces of ChunkCache
This commit is contained in:
Eugeniy E. Mikhailov
2024-09-12 00:09:20 -04:00
committed by GitHub
parent 151f2ff7a9
commit dab0bb8097
4 changed files with 61 additions and 2 deletions

View File

@@ -13,6 +13,8 @@ var ErrorOutOfBounds = errors.New("attempt to read out of bounds")
type ChunkCache interface {
ReadChunkAt(data []byte, fileId string, offset uint64) (n int, err error)
SetChunk(fileId string, data []byte)
IsInCache(fileId string, lockNeeded bool) (answer bool)
GetMaxFilePartSizeInCache() (answer uint64)
}
// a global cache for recently accessed file chunks
@@ -23,6 +25,7 @@ type TieredChunkCache struct {
onDiskCacheSizeLimit0 uint64
onDiskCacheSizeLimit1 uint64
onDiskCacheSizeLimit2 uint64
maxFilePartSizeInCache uint64
}
var _ ChunkCache = &TieredChunkCache{}
@@ -39,10 +42,49 @@ func NewTieredChunkCache(maxEntries int64, dir string, diskSizeInUnit int64, uni
c.diskCaches[0] = NewOnDiskCacheLayer(dir, "c0_2", diskSizeInUnit*unitSize/8, 2)
c.diskCaches[1] = NewOnDiskCacheLayer(dir, "c1_3", diskSizeInUnit*unitSize/4+diskSizeInUnit*unitSize/8, 3)
c.diskCaches[2] = NewOnDiskCacheLayer(dir, "c2_2", diskSizeInUnit*unitSize/2, 2)
c.maxFilePartSizeInCache = uint64(unitSize*diskSizeInUnit)/4
return c
}
func (c *TieredChunkCache) GetMaxFilePartSizeInCache() (answer uint64) {
return c.maxFilePartSizeInCache
}
func (c *TieredChunkCache) IsInCache(fileId string, lockNeeded bool) (answer bool) {
if c == nil {
return false
}
if lockNeeded {
c.RLock()
defer c.RUnlock()
}
item := c.memCache.cache.Get(fileId)
if item != nil {
glog.V(4).Infof("fileId %s is in memcache", fileId)
return true
}
fid, err := needle.ParseFileIdFromString(fileId)
if err != nil {
glog.V(4).Infof("failed to parse file id %s", fileId)
return false
}
for i, diskCacheLayer := range c.diskCaches {
for k, v := range diskCacheLayer.diskCaches {
_, ok := v.nm.Get(fid.Key)
if ok {
glog.V(4).Infof("fileId %s is in diskCaches[%d].volume[%d]", fileId, i, k)
return true
}
}
}
return false
}
func (c *TieredChunkCache) ReadChunkAt(data []byte, fileId string, offset uint64) (n int, err error) {
if c == nil {
return 0, nil
@@ -99,6 +141,10 @@ func (c *TieredChunkCache) SetChunk(fileId string, data []byte) {
defer c.Unlock()
glog.V(4).Infof("SetChunk %s size %d\n", fileId, len(data))
if c.IsInCache(fileId, false) {
glog.V(4).Infof("fileId %s is already in cache", fileId)
return
}
c.doSetChunk(fileId, data)
}