mirror of
https://github.com/seaweedfs/seaweedfs.git
synced 2025-09-19 21:07:56 +08:00
optimization for reading whole chunk with gzip encoding
This commit is contained in:
@@ -56,9 +56,12 @@ public class SeaweedRead {
|
|||||||
HttpClient client = HttpClientBuilder.create().build();
|
HttpClient client = HttpClientBuilder.create().build();
|
||||||
HttpGet request = new HttpGet(
|
HttpGet request = new HttpGet(
|
||||||
String.format("http://%s/%s", locations.getLocations(0).getUrl(), chunkView.fileId));
|
String.format("http://%s/%s", locations.getLocations(0).getUrl(), chunkView.fileId));
|
||||||
request.setHeader(HttpHeaders.ACCEPT_ENCODING, "");
|
|
||||||
request.setHeader(HttpHeaders.RANGE,
|
if (!chunkView.isFullChunk){
|
||||||
String.format("bytes=%d-%d", chunkView.offset, chunkView.offset + chunkView.size));
|
request.setHeader(HttpHeaders.ACCEPT_ENCODING, "");
|
||||||
|
request.setHeader(HttpHeaders.RANGE,
|
||||||
|
String.format("bytes=%d-%d", chunkView.offset, chunkView.offset + chunkView.size));
|
||||||
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
HttpResponse response = client.execute(request);
|
HttpResponse response = client.execute(request);
|
||||||
@@ -86,11 +89,13 @@ public class SeaweedRead {
|
|||||||
long stop = offset + size;
|
long stop = offset + size;
|
||||||
for (VisibleInterval chunk : visibleIntervals) {
|
for (VisibleInterval chunk : visibleIntervals) {
|
||||||
if (chunk.start <= offset && offset < chunk.stop && offset < stop) {
|
if (chunk.start <= offset && offset < chunk.stop && offset < stop) {
|
||||||
|
boolean isFullChunk = chunk.isFullChunk && chunk.start == offset && chunk.stop <= stop;
|
||||||
views.add(new ChunkView(
|
views.add(new ChunkView(
|
||||||
chunk.fileId,
|
chunk.fileId,
|
||||||
offset - chunk.start,
|
offset - chunk.start,
|
||||||
Math.min(chunk.stop, stop) - offset,
|
Math.min(chunk.stop, stop) - offset,
|
||||||
offset
|
offset,
|
||||||
|
isFullChunk
|
||||||
));
|
));
|
||||||
offset = Math.min(chunk.stop, stop);
|
offset = Math.min(chunk.stop, stop);
|
||||||
}
|
}
|
||||||
@@ -123,7 +128,8 @@ public class SeaweedRead {
|
|||||||
chunk.getOffset(),
|
chunk.getOffset(),
|
||||||
chunk.getOffset() + chunk.getSize(),
|
chunk.getOffset() + chunk.getSize(),
|
||||||
chunk.getFileId(),
|
chunk.getFileId(),
|
||||||
chunk.getMtime()
|
chunk.getMtime(),
|
||||||
|
true
|
||||||
);
|
);
|
||||||
|
|
||||||
// easy cases to speed up
|
// easy cases to speed up
|
||||||
@@ -142,7 +148,8 @@ public class SeaweedRead {
|
|||||||
v.start,
|
v.start,
|
||||||
chunk.getOffset(),
|
chunk.getOffset(),
|
||||||
v.fileId,
|
v.fileId,
|
||||||
v.modifiedTime
|
v.modifiedTime,
|
||||||
|
false
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
long chunkStop = chunk.getOffset() + chunk.getSize();
|
long chunkStop = chunk.getOffset() + chunk.getSize();
|
||||||
@@ -151,7 +158,8 @@ public class SeaweedRead {
|
|||||||
chunkStop,
|
chunkStop,
|
||||||
v.stop,
|
v.stop,
|
||||||
v.fileId,
|
v.fileId,
|
||||||
v.modifiedTime
|
v.modifiedTime,
|
||||||
|
false
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
if (chunkStop <= v.start || v.stop <= chunk.getOffset()) {
|
if (chunkStop <= v.start || v.stop <= chunk.getOffset()) {
|
||||||
@@ -197,21 +205,24 @@ public class SeaweedRead {
|
|||||||
public final long stop;
|
public final long stop;
|
||||||
public final long modifiedTime;
|
public final long modifiedTime;
|
||||||
public final String fileId;
|
public final String fileId;
|
||||||
|
public final boolean isFullChunk;
|
||||||
|
|
||||||
public VisibleInterval(long start, long stop, String fileId, long modifiedTime) {
|
public VisibleInterval(long start, long stop, String fileId, long modifiedTime, boolean isFullChunk) {
|
||||||
this.start = start;
|
this.start = start;
|
||||||
this.stop = stop;
|
this.stop = stop;
|
||||||
this.modifiedTime = modifiedTime;
|
this.modifiedTime = modifiedTime;
|
||||||
this.fileId = fileId;
|
this.fileId = fileId;
|
||||||
|
this.isFullChunk = isFullChunk;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String toString() {
|
public String toString() {
|
||||||
return "VisibleIntervalq{" +
|
return "VisibleInterval{" +
|
||||||
"start=" + start +
|
"start=" + start +
|
||||||
", stop=" + stop +
|
", stop=" + stop +
|
||||||
", modifiedTime=" + modifiedTime +
|
", modifiedTime=" + modifiedTime +
|
||||||
", fileId='" + fileId + '\'' +
|
", fileId='" + fileId + '\'' +
|
||||||
|
", isFullChunk=" + isFullChunk +
|
||||||
'}';
|
'}';
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -221,12 +232,14 @@ public class SeaweedRead {
|
|||||||
public final long offset;
|
public final long offset;
|
||||||
public final long size;
|
public final long size;
|
||||||
public final long logicOffset;
|
public final long logicOffset;
|
||||||
|
public final boolean isFullChunk;
|
||||||
|
|
||||||
public ChunkView(String fileId, long offset, long size, long logicOffset) {
|
public ChunkView(String fileId, long offset, long size, long logicOffset, boolean isFullChunk) {
|
||||||
this.fileId = fileId;
|
this.fileId = fileId;
|
||||||
this.offset = offset;
|
this.offset = offset;
|
||||||
this.size = size;
|
this.size = size;
|
||||||
this.logicOffset = logicOffset;
|
this.logicOffset = logicOffset;
|
||||||
|
this.isFullChunk = isFullChunk;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@@ -236,6 +249,7 @@ public class SeaweedRead {
|
|||||||
", offset=" + offset +
|
", offset=" + offset +
|
||||||
", size=" + size +
|
", size=" + size +
|
||||||
", logicOffset=" + logicOffset +
|
", logicOffset=" + logicOffset +
|
||||||
|
", isFullChunk=" + isFullChunk +
|
||||||
'}';
|
'}';
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@@ -70,6 +70,7 @@ type ChunkView struct {
|
|||||||
Offset int64
|
Offset int64
|
||||||
Size uint64
|
Size uint64
|
||||||
LogicOffset int64
|
LogicOffset int64
|
||||||
|
IsFullChunk bool
|
||||||
}
|
}
|
||||||
|
|
||||||
func ViewFromChunks(chunks []*filer_pb.FileChunk, offset int64, size int) (views []*ChunkView) {
|
func ViewFromChunks(chunks []*filer_pb.FileChunk, offset int64, size int) (views []*ChunkView) {
|
||||||
@@ -80,11 +81,13 @@ func ViewFromChunks(chunks []*filer_pb.FileChunk, offset int64, size int) (views
|
|||||||
|
|
||||||
for _, chunk := range visibles {
|
for _, chunk := range visibles {
|
||||||
if chunk.start <= offset && offset < chunk.stop && offset < stop {
|
if chunk.start <= offset && offset < chunk.stop && offset < stop {
|
||||||
|
isFullChunk := chunk.isFullChunk && chunk.start == offset && chunk.stop <= stop
|
||||||
views = append(views, &ChunkView{
|
views = append(views, &ChunkView{
|
||||||
FileId: chunk.fileId,
|
FileId: chunk.fileId,
|
||||||
Offset: offset - chunk.start, // offset is the data starting location in this file id
|
Offset: offset - chunk.start, // offset is the data starting location in this file id
|
||||||
Size: uint64(min(chunk.stop, stop) - offset),
|
Size: uint64(min(chunk.stop, stop) - offset),
|
||||||
LogicOffset: offset,
|
LogicOffset: offset,
|
||||||
|
IsFullChunk: isFullChunk,
|
||||||
})
|
})
|
||||||
offset = min(chunk.stop, stop)
|
offset = min(chunk.stop, stop)
|
||||||
}
|
}
|
||||||
@@ -116,6 +119,7 @@ func mergeIntoVisibles(visibles, newVisibles []*visibleInterval, chunk *filer_pb
|
|||||||
chunk.Offset+int64(chunk.Size),
|
chunk.Offset+int64(chunk.Size),
|
||||||
chunk.FileId,
|
chunk.FileId,
|
||||||
chunk.Mtime,
|
chunk.Mtime,
|
||||||
|
true,
|
||||||
)
|
)
|
||||||
|
|
||||||
length := len(visibles)
|
length := len(visibles)
|
||||||
@@ -135,6 +139,7 @@ func mergeIntoVisibles(visibles, newVisibles []*visibleInterval, chunk *filer_pb
|
|||||||
chunk.Offset,
|
chunk.Offset,
|
||||||
v.fileId,
|
v.fileId,
|
||||||
v.modifiedTime,
|
v.modifiedTime,
|
||||||
|
false,
|
||||||
))
|
))
|
||||||
}
|
}
|
||||||
chunkStop := chunk.Offset + int64(chunk.Size)
|
chunkStop := chunk.Offset + int64(chunk.Size)
|
||||||
@@ -144,6 +149,7 @@ func mergeIntoVisibles(visibles, newVisibles []*visibleInterval, chunk *filer_pb
|
|||||||
v.stop,
|
v.stop,
|
||||||
v.fileId,
|
v.fileId,
|
||||||
v.modifiedTime,
|
v.modifiedTime,
|
||||||
|
false,
|
||||||
))
|
))
|
||||||
}
|
}
|
||||||
if chunkStop <= v.start || v.stop <= chunk.Offset {
|
if chunkStop <= v.start || v.stop <= chunk.Offset {
|
||||||
@@ -195,14 +201,16 @@ type visibleInterval struct {
|
|||||||
stop int64
|
stop int64
|
||||||
modifiedTime int64
|
modifiedTime int64
|
||||||
fileId string
|
fileId string
|
||||||
|
isFullChunk bool
|
||||||
}
|
}
|
||||||
|
|
||||||
func newVisibleInterval(start, stop int64, fileId string, modifiedTime int64) *visibleInterval {
|
func newVisibleInterval(start, stop int64, fileId string, modifiedTime int64, isFullChunk bool) *visibleInterval {
|
||||||
return &visibleInterval{
|
return &visibleInterval{
|
||||||
start: start,
|
start: start,
|
||||||
stop: stop,
|
stop: stop,
|
||||||
fileId: fileId,
|
fileId: fileId,
|
||||||
modifiedTime: modifiedTime,
|
modifiedTime: modifiedTime,
|
||||||
|
isFullChunk: isFullChunk,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -117,7 +117,8 @@ func (fh *FileHandle) Read(ctx context.Context, req *fuse.ReadRequest, resp *fus
|
|||||||
fmt.Sprintf("http://%s/%s", locations.Locations[0].Url, chunkView.FileId),
|
fmt.Sprintf("http://%s/%s", locations.Locations[0].Url, chunkView.FileId),
|
||||||
chunkView.Offset,
|
chunkView.Offset,
|
||||||
int(chunkView.Size),
|
int(chunkView.Size),
|
||||||
buff[chunkView.LogicOffset-req.Offset:chunkView.LogicOffset-req.Offset+int64(chunkView.Size)])
|
buff[chunkView.LogicOffset-req.Offset:chunkView.LogicOffset-req.Offset+int64(chunkView.Size)],
|
||||||
|
!chunkView.IsFullChunk)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|
||||||
|
@@ -161,6 +161,6 @@ func (s3sink *S3Sink) buildReadSeeker(chunk *filer2.ChunkView) (io.ReadSeeker, e
|
|||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
buf := make([]byte, chunk.Size)
|
buf := make([]byte, chunk.Size)
|
||||||
util.ReadUrl(fileUrl, chunk.Offset, int(chunk.Size), buf)
|
util.ReadUrl(fileUrl, chunk.Offset, int(chunk.Size), buf, true)
|
||||||
return bytes.NewReader(buf), nil
|
return bytes.NewReader(buf), nil
|
||||||
}
|
}
|
||||||
|
@@ -2,6 +2,7 @@ package util
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"bytes"
|
"bytes"
|
||||||
|
"compress/gzip"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
@@ -184,24 +185,38 @@ func NormalizeUrl(url string) string {
|
|||||||
return "http://" + url
|
return "http://" + url
|
||||||
}
|
}
|
||||||
|
|
||||||
func ReadUrl(fileUrl string, offset int64, size int, buf []byte) (n int64, e error) {
|
func ReadUrl(fileUrl string, offset int64, size int, buf []byte, isReadRange bool) (n int64, e error) {
|
||||||
|
|
||||||
req, _ := http.NewRequest("GET", fileUrl, nil)
|
req, _ := http.NewRequest("GET", fileUrl, nil)
|
||||||
req.Header.Add("Range", fmt.Sprintf("bytes=%d-%d", offset, offset+int64(size)))
|
if isReadRange {
|
||||||
|
req.Header.Add("Range", fmt.Sprintf("bytes=%d-%d", offset, offset+int64(size)))
|
||||||
|
} else {
|
||||||
|
req.Header.Set("Accept-Encoding", "gzip")
|
||||||
|
}
|
||||||
|
|
||||||
r, err := client.Do(req)
|
r, err := client.Do(req)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return 0, err
|
return 0, err
|
||||||
}
|
}
|
||||||
|
|
||||||
defer r.Body.Close()
|
defer r.Body.Close()
|
||||||
if r.StatusCode >= 400 {
|
if r.StatusCode >= 400 {
|
||||||
return 0, fmt.Errorf("%s: %s", fileUrl, r.Status)
|
return 0, fmt.Errorf("%s: %s", fileUrl, r.Status)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var reader io.ReadCloser
|
||||||
|
switch r.Header.Get("Content-Encoding") {
|
||||||
|
case "gzip":
|
||||||
|
reader, err = gzip.NewReader(r.Body)
|
||||||
|
defer reader.Close()
|
||||||
|
default:
|
||||||
|
reader = r.Body
|
||||||
|
}
|
||||||
|
|
||||||
var i, m int
|
var i, m int
|
||||||
|
|
||||||
for {
|
for {
|
||||||
m, err = r.Body.Read(buf[i:])
|
m, err = reader.Read(buf[i:])
|
||||||
if m == 0 {
|
if m == 0 {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user