From ea30571615f87ac1271a39230ea837ae2ac0d539 Mon Sep 17 00:00:00 2001 From: James Fantin-Hardesty Date: Tue, 12 Sep 2023 16:02:51 +0000 Subject: [PATCH 1/3] Merged PR 70: Write uses multipart upload/copy Write now supports using multipart upload and copies when doing writes, which is how the azstorage component does them. This can improve performance for writes on very large objects because you are not required to redownload the entire object. --- component/s3storage/client.go | 149 ++++++++++++++++++++------ component/s3storage/client_test.go | 13 ++- component/s3storage/s3storage_test.go | 124 +++++++++++++++++++++ 3 files changed, 253 insertions(+), 33 deletions(-) diff --git a/component/s3storage/client.go b/component/s3storage/client.go index adfca9089..47cfdf601 100644 --- a/component/s3storage/client.go +++ b/component/s3storage/client.go @@ -41,6 +41,7 @@ import ( "errors" "fmt" "io" + "math" "os" "strings" "syscall" @@ -559,15 +560,18 @@ func (cl *Client) GetFileBlockOffsets(name string) (*common.BlockOffsetList, err return &blockList, err } - partSize := cl.Config.partSize + cutoff := cl.Config.uploadCutoff var objectSize int64 - // if file is smaller than block size then it is a small file - if result.Size < partSize { + // if file is smaller than the uploadCutoff it is small, otherwise it is a multipart + // upload + if result.Size < cutoff { blockList.Flags.Set(common.SmallFile) return &blockList, nil } + partSize := cl.Config.partSize + // Create a list of blocks that are the partSize except for the last block for objectSize <= result.Size { if objectSize+partSize >= result.Size { @@ -646,42 +650,125 @@ func (cl *Client) Write(options internal.WriteFileOptions) error { // tracks the case where our offset is great than our current file size (appending only - not modifying pre-existing data) var dataBuffer *[]byte - // get the existing object data - isSymlink := options.Metadata[symlinkKey] == "true" - oldData, _ := cl.ReadBuffer(name, 0, 0, isSymlink) - // update the data with the new data - // if we're only overwriting existing data - if int64(len(oldData)) >= offset+length { - copy(oldData[offset:], data) - dataBuffer = &oldData - // else appending and/or overwriting + fileOffsets, err := cl.GetFileBlockOffsets(name) + if err != nil { + return err + } + + if fileOffsets.SmallFile() { + // case 1: file consists of no parts (small file) + + // get the existing object data + isSymlink := options.Metadata[symlinkKey] == "true" + oldData, _ := cl.ReadBuffer(name, 0, 0, isSymlink) + // update the data with the new data + // if we're only overwriting existing data + if int64(len(oldData)) >= offset+length { + copy(oldData[offset:], data) + dataBuffer = &oldData + // else appending and/or overwriting + } else { + // if the file is not empty then we need to combine the data + if len(oldData) > 0 { + // new data buffer with the size of old and new data + newDataBuffer := make([]byte, offset+length) + // copy the old data into it + // TODO: better way to do this? + if offset != 0 { + copy(newDataBuffer, oldData) + oldData = nil + } + // overwrite with the new data we want to add + copy(newDataBuffer[offset:], data) + dataBuffer = &newDataBuffer + } else { + dataBuffer = &data + } + } + + // WriteFromBuffer should be able to handle the case where now the block is too big and gets split into multiple parts + err := cl.WriteFromBuffer(name, options.Metadata, *dataBuffer) + if err != nil { + log.Err("Client::Write : Failed to upload to object. Here's why: %v ", name, err) + return err + } } else { - // if the file is not empty then we need to combine the data - if len(oldData) > 0 { - // new data buffer with the size of old and new data - newDataBuffer := make([]byte, offset+length) - // copy the old data into it - // TODO: better way to do this? - if offset != 0 { - copy(newDataBuffer, oldData) - oldData = nil + // case 2: given offset is within the size of the object - and the object consists of multiple parts + // case 3: new parts need to be added + + index, oldDataSize, exceedsFileBlocks, appendOnly := fileOffsets.FindBlocksToModify(offset, length) + // keeps track of how much new data will be appended to the end of the file (applicable only to case 3) + newBufferSize := int64(0) + // case 3? + if exceedsFileBlocks { + newBufferSize = cl.createNewBlocks(fileOffsets, offset, length) + } + // buffer that holds that pre-existing data in those blocks we're interested in + oldDataBuffer := make([]byte, oldDataSize+newBufferSize) + if !appendOnly { + // fetch the parts that will be impacted by the new changes so we can overwrite them + err = cl.ReadInBuffer(name, fileOffsets.BlockList[index].StartIndex, oldDataSize, oldDataBuffer) + if err != nil { + log.Err("BlockBlob::Write : Failed to read data in buffer %s [%s]", name, err.Error()) } - // overwrite with the new data we want to add - copy(newDataBuffer[offset:], data) - dataBuffer = &newDataBuffer - } else { - dataBuffer = &data } - } - // WriteFromBuffer should be able to handle the case where now the block is too big and gets split into multiple blocks - err := cl.WriteFromBuffer(name, options.Metadata, *dataBuffer) - if err != nil { - log.Err("Client::Write : Failed to upload to object. Here's why: %v ", name, err) + // this gives us where the offset with respect to the buffer that holds our old data - so we can start writing the new data + blockOffset := offset - fileOffsets.BlockList[index].StartIndex + copy(oldDataBuffer[blockOffset:], data) + err := cl.stageAndCommitModifiedBlocks(name, oldDataBuffer, fileOffsets) return err } + return nil } +func (cl *Client) createBlock(blockIdLength, startIndex, size int64) *common.Block { + newBlockId := base64.StdEncoding.EncodeToString(common.NewUUIDWithLength(blockIdLength)) + newBlock := &common.Block{ + Id: newBlockId, + StartIndex: startIndex, + EndIndex: startIndex + size, + } + // mark truncated since it is a new empty block + newBlock.Flags.Set(common.TruncatedBlock) + newBlock.Flags.Set(common.DirtyBlock) + return newBlock +} + +func (cl *Client) createNewBlocks(blockList *common.BlockOffsetList, offset, length int64) int64 { + partSize := cl.Config.partSize + prevIndex := blockList.BlockList[len(blockList.BlockList)-1].EndIndex + if partSize == 0 { + partSize = DefaultPartSize + } + // BufferSize is the size of the buffer that will go beyond our current object + var bufferSize int64 + for i := prevIndex; i < offset+length; i += partSize { + blkSize := int64(math.Min(float64(partSize), float64((offset+length)-i))) + newBlock := cl.createBlock(blockList.BlockIdLength, i, blkSize) + blockList.BlockList = append(blockList.BlockList, newBlock) + // reset the counter to determine if there are leftovers at the end + bufferSize += blkSize + } + return bufferSize +} + +func (cl *Client) stageAndCommitModifiedBlocks(name string, data []byte, offsetList *common.BlockOffsetList) error { + blockOffset := int64(0) + for _, blk := range offsetList.BlockList { + if blk.Dirty() { + blk.Data = data[blockOffset : (blk.EndIndex-blk.StartIndex)+blockOffset] + blockOffset = (blk.EndIndex - blk.StartIndex) + blockOffset + // Clear the truncated flag if we are writing data to this block + if blk.Truncated() { + blk.Flags.Clear(common.TruncatedBlock) + } + } + } + + return cl.StageAndCommit(name, offsetList) +} + func (cl *Client) StageAndCommit(name string, bol *common.BlockOffsetList) error { // lock on the object name so that no stage and commit race condition occur causing failure objectMtx := cl.blockLocks.GetLock(name) diff --git a/component/s3storage/client_test.go b/component/s3storage/client_test.go index 3fd47faeb..69634c2c9 100644 --- a/component/s3storage/client_test.go +++ b/component/s3storage/client_test.go @@ -93,6 +93,8 @@ func newTestClient(configuration string) (*Client, error) { }, prefixPath: conf.PrefixPath, disableConcurrentDownload: conf.DisableConcurrentDownload, + partSize: conf.PartSizeMb * common.MbToBytes, + uploadCutoff: conf.UploadCutoffMb * common.MbToBytes, } // create a Client client := NewConnection(configForS3Client) @@ -133,10 +135,17 @@ func (s *clientTestSuite) SetupTest() { } func (s *clientTestSuite) setupTestHelper(configuration string, create bool) { + if storageTestConfigurationParameters.PartSizeMb == 0 { + storageTestConfigurationParameters.PartSizeMb = 5 + } + if storageTestConfigurationParameters.UploadCutoffMb == 0 { + storageTestConfigurationParameters.UploadCutoffMb = 5 + } if configuration == "" { - configuration = fmt.Sprintf("s3storage:\n bucket-name: %s\n key-id: %s\n secret-key: %s\n endpoint: %s\n region: %s", + configuration = fmt.Sprintf("s3storage:\n bucket-name: %s\n key-id: %s\n secret-key: %s\n endpoint: %s\n region: %s\n part-size-mb: %d\n upload-cutoff-mb: %d\n", storageTestConfigurationParameters.BucketName, storageTestConfigurationParameters.KeyID, - storageTestConfigurationParameters.SecretKey, storageTestConfigurationParameters.Endpoint, storageTestConfigurationParameters.Region) + storageTestConfigurationParameters.SecretKey, storageTestConfigurationParameters.Endpoint, storageTestConfigurationParameters.Region, + storageTestConfigurationParameters.PartSizeMb, storageTestConfigurationParameters.UploadCutoffMb) } s.config = configuration diff --git a/component/s3storage/s3storage_test.go b/component/s3storage/s3storage_test.go index 4f7f3c1ad..c69955164 100644 --- a/component/s3storage/s3storage_test.go +++ b/component/s3storage/s3storage_test.go @@ -1824,6 +1824,7 @@ func (s *s3StorageTestSuite) TestOverwriteBlocks() { defer s.cleanupTest() blockSizeMB := 5 storageTestConfigurationParameters.PartSizeMb = int64(blockSizeMB) + storageTestConfigurationParameters.UploadCutoffMb = 5 vdConfig := generateConfigYaml(storageTestConfigurationParameters) s.setupTestHelper(vdConfig, s.bucket, true) @@ -1865,6 +1866,7 @@ func (s *s3StorageTestSuite) TestOverwriteAndAppendBlocks() { defer s.cleanupTest() blockSizeMB := 5 storageTestConfigurationParameters.PartSizeMb = int64(blockSizeMB) + storageTestConfigurationParameters.UploadCutoffMb = 5 vdConfig := generateConfigYaml(storageTestConfigurationParameters) s.setupTestHelper(vdConfig, s.bucket, true) @@ -1900,9 +1902,131 @@ func (s *s3StorageTestSuite) TestOverwriteAndAppendBlocks() { f.Close() } +func (s *s3StorageTestSuite) TestAppendBlocks() { + defer s.cleanupTest() + blockSizeMB := 5 + storageTestConfigurationParameters.PartSizeMb = int64(blockSizeMB) + storageTestConfigurationParameters.UploadCutoffMb = 5 + vdConfig := generateConfigYaml(storageTestConfigurationParameters) + s.setupTestHelper(vdConfig, s.bucket, true) + + // Setup + name := generateFileName() + h, err := s.s3Storage.CreateFile(internal.CreateFileOptions{Name: name}) + s.assert.Nil(err) + data := make([]byte, 5*MB) + rand.Read(data) + + key := common.JoinUnixFilepath(s.s3Storage.stConfig.prefixPath, name) + err = s.uploadReaderAtToObject(ctx, bytes.NewReader(data), int64(len(data)), key, int64(blockSizeMB)) + s.assert.Nil(err) + f, _ := os.CreateTemp("", name+".tmp") + defer os.Remove(f.Name()) + newTestData := []byte("43211234cake") + _, err = s.s3Storage.WriteFile(internal.WriteFileOptions{Handle: h, Offset: 5 * MB, Data: newTestData}) + s.assert.Nil(err) + + currentData := append(data, []byte("43211234cake")...) + dataLen := len(currentData) + output := make([]byte, dataLen) + + err = s.s3Storage.CopyToFile(internal.CopyToFileOptions{Name: name, File: f}) + s.assert.Nil(err) + + f, err = os.Open(f.Name()) + s.assert.Nil(err) + len, err := f.Read(output) + s.assert.Nil(err) + s.assert.EqualValues(dataLen, len) + s.assert.EqualValues(currentData, output) + f.Close() +} + +func (s *s3StorageTestSuite) TestOverwriteAndAppendBlocksLargeFile() { + defer s.cleanupTest() + blockSizeMB := 5 + storageTestConfigurationParameters.PartSizeMb = int64(blockSizeMB) + storageTestConfigurationParameters.UploadCutoffMb = 5 + vdConfig := generateConfigYaml(storageTestConfigurationParameters) + s.setupTestHelper(vdConfig, s.bucket, true) + + // Setup + name := generateFileName() + h, err := s.s3Storage.CreateFile(internal.CreateFileOptions{Name: name}) + s.assert.Nil(err) + data := make([]byte, 15*MB) + rand.Read(data) + + key := common.JoinUnixFilepath(s.s3Storage.stConfig.prefixPath, name) + err = s.uploadReaderAtToObject(ctx, bytes.NewReader(data), int64(len(data)), key, int64(blockSizeMB)) + s.assert.Nil(err) + f, _ := os.CreateTemp("", name+".tmp") + defer os.Remove(f.Name()) + newTestData := []byte("43211234cake") + _, err = s.s3Storage.WriteFile(internal.WriteFileOptions{Handle: h, Offset: 15*MB - 4, Data: newTestData}) + s.assert.Nil(err) + + currentData := append(data[:len(data)-4], []byte("43211234cake")...) + dataLen := len(currentData) + output := make([]byte, dataLen) + + err = s.s3Storage.CopyToFile(internal.CopyToFileOptions{Name: name, File: f}) + s.assert.Nil(err) + + f, err = os.Open(f.Name()) + s.assert.Nil(err) + len, err := f.Read(output) + s.assert.Nil(err) + s.assert.EqualValues(dataLen, len) + s.assert.EqualValues(currentData, output) + f.Close() +} + +func (s *s3StorageTestSuite) TestOverwriteAndAppendBlocksMiddleLargeFile() { + defer s.cleanupTest() + blockSizeMB := 5 + storageTestConfigurationParameters.PartSizeMb = int64(blockSizeMB) + storageTestConfigurationParameters.UploadCutoffMb = 5 + vdConfig := generateConfigYaml(storageTestConfigurationParameters) + s.setupTestHelper(vdConfig, s.bucket, true) + + // Setup + name := generateFileName() + h, err := s.s3Storage.CreateFile(internal.CreateFileOptions{Name: name}) + s.assert.Nil(err) + data := make([]byte, 15*MB) + rand.Read(data) + + key := common.JoinUnixFilepath(s.s3Storage.stConfig.prefixPath, name) + err = s.uploadReaderAtToObject(ctx, bytes.NewReader(data), int64(len(data)), key, int64(blockSizeMB)) + s.assert.Nil(err) + f, _ := os.CreateTemp("", name+".tmp") + defer os.Remove(f.Name()) + newTestData := []byte("43211234cake") + _, err = s.s3Storage.WriteFile(internal.WriteFileOptions{Handle: h, Offset: 5*MB - 4, Data: newTestData}) + s.assert.Nil(err) + + currentData := append(data[:5*MB-4], []byte("43211234cake")...) + currentData = append(currentData, data[5*MB+8:]...) + dataLen := len(currentData) + output := make([]byte, dataLen) + + err = s.s3Storage.CopyToFile(internal.CopyToFileOptions{Name: name, File: f}) + s.assert.Nil(err) + + f, err = os.Open(f.Name()) + s.assert.Nil(err) + len, err := f.Read(output) + s.assert.Nil(err) + s.assert.EqualValues(dataLen, len) + s.assert.EqualValues(currentData, output) + f.Close() +} + func (s *s3StorageTestSuite) TestAppendOffsetLargerThanSize() { defer s.cleanupTest() // Setup + storageTestConfigurationParameters.UploadCutoffMb = 5 name := generateFileName() h, err := s.s3Storage.CreateFile(internal.CreateFileOptions{Name: name}) s.assert.Nil(err) From 5ab3f5d9250f812df85999e52abc9cc2da2aa33b Mon Sep 17 00:00:00 2001 From: James Fantin-Hardesty Date: Tue, 12 Sep 2023 16:03:08 +0000 Subject: [PATCH 2/3] Merged PR 72: Update blobfuse2.1.0 This pulls in the latest changes from the blobfuse 2.1.0 release. This adds a new block_cache component that I have changed to only work on Linux, since it is very specific to Linux. See https://github.com/Azure/azure-storage-fuse/releases/tag/blobfuse2-2.1.0 for the other release notes of the release. --- CHANGELOG.md | 21 +- NOTICE | 33 + README.md | 17 + azure-pipeline-templates/build-release.yml | 2 +- azure-pipeline-templates/build.yml | 12 +- .../cloudfuse-ci-template.yml | 150 ++++ .../e2e-tests-block-cache.yml | 118 +++ azure-pipeline-templates/mount.yml | 4 + azure-pipeline-templates/setup.yml | 12 +- azure-pipeline-templates/verbose-tests.yml | 8 +- cloudfuse-ci.yaml | 183 +--- cloudfuse-code-coverage.yaml | 2 +- cloudfuse-nightly.yaml | 540 +++++++++--- cloudfuse-release.yaml | 573 ++++++++++-- cmd/health-monitor_test.go | 26 +- cmd/{imports.go => import_windows.go} | 2 + .../imports_linux.go | 55 +- cmd/mount.go | 23 +- cmd/mount_linux_test.go | 16 + common/types.go | 2 +- common/util.go | 3 + common/util_linux.go | 96 ++ common/util_test.go | 43 + common/util_windows.go | 65 ++ component/azstorage/azauth_test.go | 27 + component/azstorage/azauthmsi.go | 214 ++++- component/azstorage/azstorage.go | 3 +- component/azstorage/block_blob.go | 4 +- component/block_cache/block_cache_linux.go | 819 ++++++++++++++++++ .../block_cache/block_cache_linux_test.go | 498 +++++++++++ component/block_cache/block_linux.go | 113 +++ component/block_cache/block_linux_test.go | 188 ++++ component/block_cache/blockpool_linux.go | 152 ++++ component/block_cache/blockpool_linux_test.go | 166 ++++ component/block_cache/threadpool_linux.go | 148 ++++ .../block_cache/threadpool_linux_test.go | 141 +++ component/file_cache/cache_policy.go | 20 +- component/file_cache/cache_policy_linux.go | 114 --- component/file_cache/cache_policy_test.go | 19 +- component/file_cache/file_cache.go | 63 +- component/file_cache/file_cache_linux.go | 2 +- component/file_cache/file_cache_test.go | 128 ++- component/file_cache/file_cache_windows.go | 2 +- component/file_cache/lru_policy.go | 3 +- component/libfuse/libfuse.go | 10 +- component/libfuse/libfuse2_handler.go | 7 +- component/loopback/loopback_fs.go | 11 + go.mod | 36 +- go.sum | 71 +- go_installer.sh | 19 +- internal/handlemap/handle_map.go | 22 +- setup/baseConfig.yaml | 12 + test/benchmark_test/fio.cfg | 13 + test/longhaul/longhaul.sh | 6 + test/scripts/fio.sh | 0 test/scripts/fio/random_read_20T_100G.fio | 9 + test/scripts/fio/random_read_4T_100G.fio | 9 + test/scripts/fio/random_read_4T_4G.fio | 9 + .../scripts/fio/sequential_read_128T_100G.fio | 9 + test/scripts/fio/sequential_read_1T_4G.fio | 9 + test/scripts/fio/sequential_read_20T_100G.fio | 9 + test/scripts/fio/sequential_read_4T_100G.fio | 9 + test/scripts/fio/sequential_read_4T_4G.fio | 9 + test/scripts/latency_hiding.jl | 43 + test/scripts/parallel_batch_read.py | 83 ++ test/scripts/profile-latency-hiding.sh | 32 + test/scripts/readwrite.c | 0 test/test_utils/dir_list_seek.c | 66 ++ testdata/config/azure_key_bc.yaml | 41 + tools/health-monitor/internal/stats_export.go | 2 +- 70 files changed, 4700 insertions(+), 676 deletions(-) create mode 100644 azure-pipeline-templates/cloudfuse-ci-template.yml create mode 100644 azure-pipeline-templates/e2e-tests-block-cache.yml rename cmd/{imports.go => import_windows.go} (99%) rename component/file_cache/cache_policy_windows.go => cmd/imports_linux.go (58%) create mode 100644 component/block_cache/block_cache_linux.go create mode 100644 component/block_cache/block_cache_linux_test.go create mode 100644 component/block_cache/block_linux.go create mode 100644 component/block_cache/block_linux_test.go create mode 100644 component/block_cache/blockpool_linux.go create mode 100644 component/block_cache/blockpool_linux_test.go create mode 100644 component/block_cache/threadpool_linux.go create mode 100644 component/block_cache/threadpool_linux_test.go delete mode 100644 component/file_cache/cache_policy_linux.go create mode 100644 test/benchmark_test/fio.cfg mode change 100644 => 100755 test/scripts/fio.sh create mode 100755 test/scripts/fio/random_read_20T_100G.fio create mode 100755 test/scripts/fio/random_read_4T_100G.fio create mode 100755 test/scripts/fio/random_read_4T_4G.fio create mode 100755 test/scripts/fio/sequential_read_128T_100G.fio create mode 100755 test/scripts/fio/sequential_read_1T_4G.fio create mode 100755 test/scripts/fio/sequential_read_20T_100G.fio create mode 100755 test/scripts/fio/sequential_read_4T_100G.fio create mode 100755 test/scripts/fio/sequential_read_4T_4G.fio create mode 100755 test/scripts/latency_hiding.jl create mode 100755 test/scripts/parallel_batch_read.py create mode 100755 test/scripts/profile-latency-hiding.sh mode change 100644 => 100755 test/scripts/readwrite.c create mode 100644 test/test_utils/dir_list_seek.c create mode 100644 testdata/config/azure_key_bc.yaml diff --git a/CHANGELOG.md b/CHANGELOG.md index 7d5e9e175..6353e8da2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,22 @@ -## 2.0.5 (WIP) +## 2.1.0 (WIP) +**Features** +- Added support for ARM64 architecture. +- Block cache component added to support faster serial reads of large files with prefetching of blocks + - As of now only one file single threaded read is faster + - Only read-only mounts will support block-cache +- Adaptive prefetching to support random reads without incurring extra network cost +- Block cache with disk backup to reduce network cost if same blocks are read again +- On AML compute cluster MSI authentication is now supported (this will use the identity assigned to compute cluster) + +**Bug Fixes** +- Fix to evict the destination file from local cache post rename file operation. +- If `$PATH` is not populated correctly, find out correct path for `du` command. +- Disable `kernel_cache` and `writeback_cache` when `direct_io` is set. +- Fix FUSE CLI parameter parsing, where CLI overrides parameters provided in config file. +- [#1226](https://github.com/Azure/azure-storage-fuse/issues/1226) If max disk-cache size is not configured, check the available disk space to kick-in early eviction. +- [#1230](https://github.com/Azure/azure-storage-fuse/issues/1230) Truncate file locally and then upload instead of downloading it again. + +## 2.0.5 (2023-08-02) **Features** - In case of MSI based authentication, user shall provide object-id of the identity and honour-acl flag for file-system to work with ACLs assigned to the given identity instead of permissions. - Added support to read OAuth token from a user given file. @@ -19,6 +37,7 @@ - Added new config parameter 'refresh-sec' in 'file-cache'. When file-cache-timeout is set to a large value, this field can control when to refresh the file if file in container has changed. - Added FUSE option `direct_io` to bypass the kernel cache and perform direct I/O operations. + **Bug Fixes** - [#1116](https://github.com/Azure/azure-storage-fuse/issues/1116) Relative path for tmp-cache is resulting into file read-write failure. - [#1151](https://github.com/Azure/azure-storage-fuse/issues/1151) Reason for unmount failure is not displayed in the console output. diff --git a/NOTICE b/NOTICE index 5356a2f5d..f9b5b6506 100644 --- a/NOTICE +++ b/NOTICE @@ -18545,4 +18545,37 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. limitations under the License. + + + + +**************************************************************************** + +============================================================================ +>>> github.com/vibhansa-msft/tlru +============================================================================== + +MIT License + +Copyright (c) 2023 Vikas Bhansali + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + + --------------------- END OF THIRD PARTY NOTICE -------------------------------- diff --git a/README.md b/README.md index ce5adfb3e..8397bde6d 100755 --- a/README.md +++ b/README.md @@ -12,6 +12,15 @@ writes, however, it does not guarantee continuous sync of data written to storag Cloudfuse. For data integrity it is recommended that multiple sources do not modify the same blob/object/file. Please submit an issue [here]() for any issues/feature requests/questions. +## NOTICE + +- We have seen some customer issues around files getting corrupted when `streaming` is used in write mode. Kindly avoid using this feature for write while we investigate and resolve it. + + + ## Features - Mount an S3 bucket or Azure storage container or datalake file system on Linux and Windows. - Basic file system operations such as mkdir, opendir, readdir, rmdir, open, read, create, write, close, unlink, @@ -141,6 +150,13 @@ include the name of the command (For example: `cloudfuse mount -h`). otherwise it just evicts file from local cache. - Stream options * `--block-size-mb=`: Size of a block to be downloaded during streaming. +- Block-Cache options + * `--block-cache-block-size=`: Size of a block to be downloaded as a unit. + * `--block-cache-pool-size=`: Size of pool to be used for caching. This limits total memory used by block-cache. + * `--block-cache-path=`: Path where downloaded blocks will be persisted. Not providing this parameter will disable the disk caching. + * `--block-cache-disk-size=`: Disk space to be used for caching. + * `--block-cache-prefetch=`: Number of blocks to prefetch at max when sequential reads are in progress. + * `--block-cache-prefetch-on-open=true`: Start prefetching on open system call instead of waiting for first read. Enhances perf if file is read sequentially from offset 0. - Fuse options * `--attr-timeout=`: Time the kernel can cache inode attributes. * `--entry-timeout=`: Time the kernel can cache directory listing. @@ -218,6 +234,7 @@ cost and performance implications. - chown : Change of ownership is not supported by Azure Storage hence Cloudfuse does not support this. - Creation of device files or pipes is not supported by Cloudfuse. - Cloudfuse does not support extended-attributes (x-attrs) operations +- Cloudfuse does not support lseek() operation on directory handles. No error is thrown but it will not work as expected. ## Un-Supported Scenarios - Cloudfuse does not support overlapping mount paths. While running multiple instances of Cloudfuse make sure each diff --git a/azure-pipeline-templates/build-release.yml b/azure-pipeline-templates/build-release.yml index 4121d5ce1..abbad047b 100644 --- a/azure-pipeline-templates/build-release.yml +++ b/azure-pipeline-templates/build-release.yml @@ -17,7 +17,7 @@ steps: - task: ShellScript@2 inputs: scriptPath: "${{ parameters.work_dir }}/go_installer.sh" - args: "${{ parameters.root_dir }}/ 1.20.5" + args: "${{ parameters.root_dir }}/" displayName: "Installing Go tools" # Installing Cloudfuse Dependencies via go get diff --git a/azure-pipeline-templates/build.yml b/azure-pipeline-templates/build.yml index 8012863a7..8b979729c 100755 --- a/azure-pipeline-templates/build.yml +++ b/azure-pipeline-templates/build.yml @@ -38,19 +38,11 @@ steps: displayName: 'Libfuse Setup' condition: eq('${{ parameters.hostedAgent }}', true) - # GoTool task used only for Microsoft Hosted Agents to install Go-lang - - task: GoTool@0 - inputs: - version: '1.20.5' - condition: ${{ parameters.hostedAgent }} - displayName: "GoTool Setup" - - # Alternative custom script for Self-Hosted agents to install Go-lang + # Custom script to install Go-lang - task: ShellScript@2 inputs: scriptPath: "${{ parameters.working_directory }}/go_installer.sh" - args: "${{ parameters.root_dir }}/ 1.20.5" - condition: not(${{parameters.hostedAgent }}) + args: "${{ parameters.root_dir }}/" displayName: "GoTool Custom Setup" # Downloading Go dependency packages diff --git a/azure-pipeline-templates/cloudfuse-ci-template.yml b/azure-pipeline-templates/cloudfuse-ci-template.yml new file mode 100644 index 000000000..d879aadb3 --- /dev/null +++ b/azure-pipeline-templates/cloudfuse-ci-template.yml @@ -0,0 +1,150 @@ +parameters: + - name: fuselib + type: string + - name: tags + type: string + - name: container + type: string + - name: skip_msi + type: string + default: "true" + - name: proxy_address + type: string + default: "" + - name: skip_fmt + type: boolean + default: true + +steps: + - checkout: self + + - task: GoTool@0 + inputs: + version: '1.20.5' + displayName: "Select Go Version" + + # Install fuse and build the code + - script: | + sudo apt-get update --fix-missing + sudo apt-get install ${{ parameters.fuselib }} gcc -y + displayName: 'Install libfuse' + + - task: Go@0 + inputs: + command: 'build' + workingDirectory: ./ + arguments: "-tags ${{ parameters.tags }} -o cloudfuse" + displayName: "Build" + + - script: | + cnfFile=$HOME/azuretest.json + echo $cnfFile + touch $cnfFile + echo "{" > $cnfFile + echo "\"block-acct\"": "\"$(AZTEST_BLOCK_ACC_NAME)\"", >> $cnfFile + echo "\"adls-acct\"": "\"$(AZTEST_ADLS_ACC_NAME)\"", >> $cnfFile + echo "\"block-cont\"": "\"${{ parameters.container }}\"", >> $cnfFile + echo "\"adls-cont\"": "\"${{ parameters.container }}\"", >> $cnfFile + echo "\"block-key\"": "\"$(AZTEST_BLOCK_KEY)\"", >> $cnfFile + echo "\"adls-key\"": "\"$(AZTEST_ADLS_KEY)\"", >> $cnfFile + echo "\"block-sas\"": "\"$(AZTEST_BLOCK_SAS)\"", >> $cnfFile + echo "\"block-cont-sas-ubn-18\"": "\"$(AZTEST_BLOCK_CONT_SAS_UBN_18)\"", >> $cnfFile + echo "\"block-cont-sas-ubn-20\"": "\"$(AZTEST_BLOCK_CONT_SAS_UBN_20)\"", >> $cnfFile + echo "\"adls-sas\"": "\"$(adlsSas)\"", >> $cnfFile + echo "\"msi-appid\"": "\"$(AZTEST_APP_ID)\"", >> $cnfFile + echo "\"msi-resid\"": "\"$(AZTEST_RES_ID)\"", >> $cnfFile + echo "\"msi-objid\"": "\"$(AZTEST_OBJ_ID)\"", >> $cnfFile + echo "\"spn-client\"": "\"$(AZTEST_CLIENT)\"", >> $cnfFile + echo "\"spn-tenant\"": "\"$(AZTEST_TENANT)\"", >> $cnfFile + echo "\"spn-secret\"": "\"$(AZTEST_SECRET)\"", >> $cnfFile + echo "\"skip-msi\"": "${{ parameters.skip_msi }}", >> $cnfFile + echo "\"proxy-address\"": "\"${{ parameters.proxy_address }}\"" >> $cnfFile + echo "}" >> $cnfFile + cat $cnfFile + displayName: "Create Configuration File" + continueOnError: false + workingDirectory: ./ + + - script: | + cnfFile=$HOME/s3test.json + echo $cnfFile + touch $cnfFile + echo "{" > $cnfFile + echo "\"bucket-name\"": "\"$(S3TEST_BUCKET_NAME)\"", >> $cnfFile + echo "\"access-key\"": "\"$(S3TEST_ACCESS_KEY)\"", >> $cnfFile + echo "\"secret-key\"": "\"$(S3TEST_SECRET_KEY)\"", >> $cnfFile + echo "\"endpoint\"": "\"$(S3TEST_ENDPOINT)\"", >> $cnfFile + echo "\"region\"": "\"$(S3TEST_REGION)\"" >> $cnfFile + echo "}" >> $cnfFile + cat $cnfFile + displayName: "Create S3 Configuration File" + continueOnError: false + workingDirectory: ./ + + # Code lint checks (Static-analysis) + # Exit code changed to prevent failing in CI/CD pipeline + # TODO: Remove this once we are passing file coverage checks consistently + - script: | + curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b $(go env GOPATH)/bin + $(go env GOPATH)/bin/golangci-lint --version + $(go env GOPATH)/bin/golangci-lint run --tests=false --build-tags ${{ parameters.tags }} --skip-dirs test,common/stats_collector,common/stats_monitor --max-issues-per-linter=0 --skip-files component/libfuse/libfuse2_handler_test_wrapper.go,component/libfuse/libfuse_handler_test_wrapper.go > lint.log + result=$(cat lint.log | wc -l) + if [ $result -ne 0 ]; then + echo "-----------------------------------" + echo "Below issues are found in SA" + cat lint.log + echo "-----------------------------------" + exit 0 + else + echo "-----------------------------------" + echo "No issues are found in SA" + echo "-----------------------------------" + fi + displayName: 'Static Analysis (Lint)' + condition: always() + workingDirectory: ./ + + # Copyright checks + # - script: | + # result=$(grep -L -r --include \*.go "`date +%Y` Microsoft Corporation" ./ | wc -l) + # if [ $result -ne 0 ]; then + # exit 1 + # else + # echo "Copyright statements are up to date" + # fi + # displayName: 'Copyright check' + # condition: always() + # failOnStderr: true + # workingDirectory: ./ + + # Go code formatting checks + - script: | + gofmt -s -l -d . | tee >&2 + displayName: 'Go Format Check' + condition: eq(${{ parameters.skip_fmt }}, true) + failOnStderr: true + workingDirectory: ./ + + # Notices files check + - script: | + ./notices_fix.sh + result=$(git diff NOTICE | wc -l) + if [ $result -ne 0 ]; then + echo "Notices needs a fix. Run ./notices_fix.sh and commit NOTICE file." + exit 1 + else + echo "Notices are up to date." + fi + displayName: 'Notice file check' + condition: always() + failOnStderr: true + workingDirectory: ./ + + # Running unit tests for fuse3 on ubn-20 + - task: Go@0 + inputs: + command: 'test' + arguments: '-v -timeout=2h ./... --tags=unittest,${{ parameters.tags }} -coverprofile utcover.cov' + workingDirectory: ./ + displayName: 'Unit tests' + continueOnError: false diff --git a/azure-pipeline-templates/e2e-tests-block-cache.yml b/azure-pipeline-templates/e2e-tests-block-cache.yml new file mode 100644 index 000000000..465741fe0 --- /dev/null +++ b/azure-pipeline-templates/e2e-tests-block-cache.yml @@ -0,0 +1,118 @@ +parameters: + - name: conf_template + type: string + - name: config_file + type: string + - name: container + type: string + - name: temp_dir + type: string + - name: mount_dir + type: string + - name: idstring + type: string + - name: adls + type: boolean + - name: account_name + type: string + - name: account_key + type: string + - name: account_type + type: string + - name: account_endpoint + - name: distro_name + type: string + - name: quick_test + type: boolean + default: true + - name: verbose_log + type: boolean + default: false + - name: clone + type: boolean + default: false + - name: stream_direct_test + type: boolean + default: false + +steps: + - script: | + $(WORK_DIR)/blobfuse2 gen-test-config --config-file=$(WORK_DIR)/testdata/config/azure_key.yaml --container-name=${{ parameters.container }} --temp-path=${{ parameters.temp_dir }} --output-file=${{ parameters.config_file }} + displayName: 'Create Config File for RW mount' + env: + NIGHTLY_STO_ACC_NAME: ${{ parameters.account_name }} + NIGHTLY_STO_ACC_KEY: ${{ parameters.account_key }} + ACCOUNT_TYPE: ${{ parameters.account_type }} + ACCOUNT_ENDPOINT: ${{ parameters.account_endpoint }} + VERBOSE_LOG: ${{ parameters.verbose_log }} + continueOnError: false + + - script: + cat ${{ parameters.config_file }} + displayName: 'Print config file' + + - template: 'mount.yml' + parameters: + working_dir: $(WORK_DIR) + mount_dir: ${{ parameters.mount_dir }} + temp_dir: ${{ parameters.temp_dir }} + prefix: ${{ parameters.idstring }} + mountStep: + script: | + $(WORK_DIR)/blobfuse2 mount ${{ parameters.mount_dir }} --config-file=${{ parameters.config_file }} --default-working-dir=$(WORK_DIR) --file-cache-timeout=3200 + + - script: | + for i in {1,2,3,4,5,6,7,8,9,10,20,30,50,100,200,1024,2048,4096}; do echo $i; done | parallel --will-cite -j 5 'head -c {}M < /dev/urandom > ${{ parameters.mount_dir }}/myfile_{}' + ls -l ${{ parameters.mount_dir }}/myfile_* + displayName: 'Generate data' + + - script: | + md5sum ${{ parameters.mount_dir }}/myfile_* > $(WORK_DIR)/md5sum_file_cache.txt + displayName: 'Generate md5Sum with File-Cache' + + - script: | + $(WORK_DIR)/blobfuse2 unmount all + displayName: 'Unmount RW mount' + + - script: | + $(WORK_DIR)/blobfuse2 gen-test-config --config-file=$(WORK_DIR)/testdata/config/azure_key_bc.yaml --container-name=${{ parameters.container }} --temp-path=${{ parameters.temp_dir }} --output-file=${{ parameters.config_file }} + displayName: 'Create Config File for RO mount' + env: + NIGHTLY_STO_ACC_NAME: ${{ parameters.account_name }} + NIGHTLY_STO_ACC_KEY: ${{ parameters.account_key }} + ACCOUNT_TYPE: ${{ parameters.account_type }} + ACCOUNT_ENDPOINT: ${{ parameters.account_endpoint }} + VERBOSE_LOG: ${{ parameters.verbose_log }} + continueOnError: false + + - template: 'mount.yml' + parameters: + working_dir: $(WORK_DIR) + mount_dir: ${{ parameters.mount_dir }} + temp_dir: ${{ parameters.temp_dir }} + prefix: ${{ parameters.idstring }} + ro_mount: true + mountStep: + script: | + $(WORK_DIR)/blobfuse2 mount ${{ parameters.mount_dir }} --config-file=${{ parameters.config_file }} --default-working-dir=$(WORK_DIR) -o ro + + - script: | + md5sum ${{ parameters.mount_dir }}/myfile_* > $(WORK_DIR)/md5sum_block_cache.txt + displayName: 'Generate md5Sum with Block-Cache' + + - script: | + $(WORK_DIR)/blobfuse2 unmount all + displayName: 'Unmount RO mount' + + - script: | + diff $(WORK_DIR)/md5sum_block_cache.txt $(WORK_DIR)/md5sum_file_cache.txt + if [ $? -ne 0 ]; then + exit 1 + fi + displayName: 'Compare md5Sum' + + - template: 'cleanup.yml' + parameters: + working_dir: $(WORK_DIR) + mount_dir: ${{ parameters.mount_dir }} + temp_dir: ${{ parameters.temp_dir }} \ No newline at end of file diff --git a/azure-pipeline-templates/mount.yml b/azure-pipeline-templates/mount.yml index 27510c0bf..7767a08be 100755 --- a/azure-pipeline-templates/mount.yml +++ b/azure-pipeline-templates/mount.yml @@ -10,6 +10,9 @@ parameters: - name: prefix type: string default: 'Test' + - name: ro_mount + type: boolean + default: false steps: @@ -42,5 +45,6 @@ steps: timeoutInMinutes: 30 displayName: '${{ parameters.prefix }}: PreStart Cleanup' continueOnError: true + condition: eq( ${{ parameters.ro_mount }}, false ) env: mount_dir: ${{ parameters.mount_dir }} \ No newline at end of file diff --git a/azure-pipeline-templates/setup.yml b/azure-pipeline-templates/setup.yml index 8e930a3f8..828a9e76e 100644 --- a/azure-pipeline-templates/setup.yml +++ b/azure-pipeline-templates/setup.yml @@ -14,11 +14,6 @@ steps: hostnamectl displayName: 'Print Agent Info' - - task: GoTool@0 - inputs: - version: '1.20.5' - displayName: "GoTool Setup" - # Create directory structure and prepare to mount - ${{ parameters.installStep }} - script: | @@ -40,6 +35,13 @@ steps: git checkout `echo $(Build.SourceBranch) | cut -d "/" -f 1,2 --complement` displayName: 'Checkout Branch' workingDirectory: $(WORK_DIR) + + # Custom script to install Go-lang + - task: ShellScript@2 + inputs: + scriptPath: "$(WORK_DIR)/go_installer.sh" + args: "$(ROOT_DIR)/" + displayName: "GoTool Custom Setup" # Downloading Go dependency packages - task: Go@0 diff --git a/azure-pipeline-templates/verbose-tests.yml b/azure-pipeline-templates/verbose-tests.yml index df956a8ab..0804b3cf1 100644 --- a/azure-pipeline-templates/verbose-tests.yml +++ b/azure-pipeline-templates/verbose-tests.yml @@ -90,6 +90,7 @@ steps: - script: cat ${{ parameters.config }} displayName: Print config file + condition: ${{ parameters.test_key_credential }} # Stream e2e - script: | @@ -107,6 +108,7 @@ steps: - script: cat ${{ parameters.stream_config }} displayName: Print Stream config file with Handle Level Caching + condition: ${{ parameters.test_stream }} # Stream e2e filename level caching - script: | @@ -124,6 +126,7 @@ steps: - script: cat ${{ parameters.stream_filename_config }} displayName: Print Stream config file with Filename Caching + condition: ${{ parameters.test_stream }} # Create sas credential config file if we need to test it - script: | @@ -141,6 +144,7 @@ steps: - script: cat ${{ parameters.sas_credential_config }} displayName: Print SAS config file + condition: ${{ parameters.test_sas_credential }} # Create spn credential config file if we need to test it - script: | @@ -160,6 +164,7 @@ steps: - script: cat ${{ parameters.spn_credential_config }} displayName: Print SPN config file + condition: ${{ parameters.test_spn_credential }} # Create azurite config file if we need to test it - script: | @@ -415,8 +420,7 @@ steps: distro_name: ${{ parameters.distro_name }} mountStep: script: > - ${{ parameters.working_dir }}/cloudfuse mount ${{ parameters.mount_dir }} --config-file=${{ parameters.config }} - --default-working-dir=${{ parameters.working_dir }} + ${{ parameters.working_dir }}/cloudfuse mount ${{ parameters.mount_dir }} --config-file=${{ parameters.config }} --default-working-dir=${{ parameters.working_dir }} displayName: 'StressTest: Mount' timeoutInMinutes: 3 continueOnError: false diff --git a/cloudfuse-ci.yaml b/cloudfuse-ci.yaml index 9ef691666..2a1769f6e 100644 --- a/cloudfuse-ci.yaml +++ b/cloudfuse-ci.yaml @@ -9,7 +9,7 @@ pr: jobs: # Ubuntu based test suite - job: test - displayName: Build and Test on + displayName: CI on strategy: matrix: Ubuntu-20: @@ -31,150 +31,37 @@ jobs: - group: NightlyCloudFuse steps: - - checkout: self - - - task: GoTool@0 - inputs: - version: '1.20.5' - displayName: "Select Go Version" - - - task: Go@0 - inputs: - command: 'get' - arguments: '-d ./...' - workingDirectory: './' - displayName: "Get Dependencies" - - # Install fuse and build the code - - script: | - sudo apt-get update --fix-missing - sudo apt-get install $(fuselib) -y - displayName: 'Install libfuse' - - - task: Go@0 - inputs: - command: 'build' - workingDirectory: ./ - arguments: "-tags $(tags) -o cloudfuse" - displayName: "Build" - - - script: | - cnfFile=$HOME/azuretest.json - echo $cnfFile - touch $cnfFile - echo "{" > $cnfFile - echo "\"block-acct\"": "\"$(AZTEST_BLOCK_ACC_NAME)\"", >> $cnfFile - echo "\"adls-acct\"": "\"$(AZTEST_ADLS_ACC_NAME)\"", >> $cnfFile - echo "\"block-cont\"": "\"$(containerName)\"", >> $cnfFile - echo "\"adls-cont\"": "\"$(containerName)\"", >> $cnfFile - echo "\"block-key\"": "\"$(AZTEST_BLOCK_KEY)\"", >> $cnfFile - echo "\"adls-key\"": "\"$(AZTEST_ADLS_KEY)\"", >> $cnfFile - echo "\"block-sas\"": "\"$(AZTEST_BLOCK_SAS)\"", >> $cnfFile - echo "\"block-cont-sas-ubn-18\"": "\"$(AZTEST_BLOCK_CONT_SAS_UBN_18)\"", >> $cnfFile - echo "\"block-cont-sas-ubn-20\"": "\"$(AZTEST_BLOCK_CONT_SAS_UBN_20)\"", >> $cnfFile - echo "\"adls-sas\"": "\"$(adlsSas)\"", >> $cnfFile - echo "\"msi-appid\"": "\"$(AZTEST_APP_ID)\"", >> $cnfFile - echo "\"msi-resid\"": "\"$(AZTEST_RES_ID)\"", >> $cnfFile - echo "\"msi-objid\"": "\"$(AZTEST_OBJ_ID)\"", >> $cnfFile - echo "\"spn-client\"": "\"$(AZTEST_CLIENT)\"", >> $cnfFile - echo "\"spn-tenant\"": "\"$(AZTEST_TENANT)\"", >> $cnfFile - echo "\"spn-secret\"": "\"$(AZTEST_SECRET)\"", >> $cnfFile - echo "\"skip-msi\"": "true", >> $cnfFile - echo "\"proxy-address\"": "\"\"" >> $cnfFile - echo "}" >> $cnfFile - cat $cnfFile - displayName: "Create Azure Configuration File" - continueOnError: false - workingDirectory: ./ - - - script: | - cnfFile=$HOME/s3test.json - echo $cnfFile - touch $cnfFile - echo "{" > $cnfFile - echo "\"bucket-name\"": "\"$(S3TEST_BUCKET_NAME)\"", >> $cnfFile - echo "\"access-key\"": "\"$(S3TEST_ACCESS_KEY)\"", >> $cnfFile - echo "\"secret-key\"": "\"$(S3TEST_SECRET_KEY)\"", >> $cnfFile - echo "\"endpoint\"": "\"$(S3TEST_ENDPOINT)\"", >> $cnfFile - echo "\"region\"": "\"$(S3TEST_REGION)\"" >> $cnfFile - echo "}" >> $cnfFile - cat $cnfFile - displayName: "Create S3 Configuration File" - continueOnError: false - workingDirectory: ./ - - # Code lint checks (Static-analysis) - # Exit code changed to prevent failing in CI/CD pipeline - # TODO: Remove this once we are passing file coverage checks consistently - - script: | - curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b $(go env GOPATH)/bin - $(go env GOPATH)/bin/golangci-lint --version - $(go env GOPATH)/bin/golangci-lint run --tests=false --build-tags $(tags) --skip-dirs test,common/stats_collector,common/stats_monitor --max-issues-per-linter=0 --skip-files component/libfuse/libfuse2_handler_test_wrapper.go,component/libfuse/libfuse_handler_test_wrapper.go > lint.log - result=$(cat lint.log | wc -l) - if [ $result -ne 0 ]; then - echo "-----------------------------------" - echo "Below issues are found in SA" - cat lint.log - echo "-----------------------------------" - exit 0 - else - echo "-----------------------------------" - echo "No issues are found in SA" - echo "-----------------------------------" - fi - displayName: 'Static Analysis (Lint)' - condition: always() - workingDirectory: ./ - - # Copyright checks - # - script: | - # result=$(grep -L -r --include \*.go "`date +%Y` Microsoft Corporation" ./ | wc -l) - # if [ $result -ne 0 ]; then - # exit 1 - # else - # echo "Copyright statements are up to date" - # fi - # displayName: 'Copyright check' - # condition: always() - # failOnStderr: true - # workingDirectory: ./ - - # Go code formatting checks - - script: | - gofmt -s -l -d . | tee >&2 - displayName: 'Go Format Check' - condition: always() - failOnStderr: true - workingDirectory: ./ - - # Notices files check - - script: | - ./notices_fix.sh - result=$(git diff NOTICE | wc -l) - if [ $result -ne 0 ]; then - echo "Notices needs a fix. Run ./notices_fix.sh and commit NOTICE file." - exit 1 - else - echo "Notices are up to date." - fi - displayName: 'Notice file check' - condition: always() - failOnStderr: true - workingDirectory: ./ - - # Running unit tests for fuse3 on ubn-20 - - task: Go@0 - inputs: - command: 'test' - arguments: '-v -timeout=2h ./... --tags=unittest,$(tags) -coverprofile utcover.cov' - displayName: 'Unit tests' - condition: always() - - # Comment out as this is specific to an interal microsoft test tool - # - task: ComponentGovernanceComponentDetection@0 - # inputs: - # scanType: 'Register' - # verbosity: 'Verbose' - # alertWarningLevel: 'High' - # displayName: "Component governance" - # condition: always() + # ---------------------------------------------------------------- + - template: 'azure-pipeline-templates/cloudfuse-ci-template.yml' + parameters: + container: $(containerName) + tags: $(tags) + fuselib: $(fuselib) + + # - job: test_ARM64 + # displayName: CI on + # strategy: + # matrix: + # Ubuntu-22-ARM64: + # imageName: 'blobfuse-ubn22-arm64' + # containerName: 'test-cnt-ubn-22' + # fuselib: 'libfuse3-dev' + # tags: 'fuse3' + # adlsSas: $(AZTEST_ADLS_CONT_SAS_UBN_22) + + # pool: + # name: "blobfuse-ubn-arm64-pool" + # demands: + # - ImageOverride -equals $(imageName) + + # variables: + # - group: NightlyBlobFuse + + # steps: + # # ---------------------------------------------------------------- + # - template: 'azure-pipeline-templates/cloudfuse-ci-template.yml' + # parameters: + # container: $(containerName) + # tags: $(tags) + # fuselib: $(fuselib) + # skip_fmt: false diff --git a/cloudfuse-code-coverage.yaml b/cloudfuse-code-coverage.yaml index c367fc852..987ecce00 100644 --- a/cloudfuse-code-coverage.yaml +++ b/cloudfuse-code-coverage.yaml @@ -790,7 +790,7 @@ stages: - script: | echo 'mode: count' > ./cloudfuse_coverage_raw.rpt tail -q -n +2 ./*.cov >> ./cloudfuse_coverage_raw.rpt - cat ./cloudfuse_coverage_raw.rpt | grep -v mock_component | grep -v base_component | grep -v loopback | grep -v tools | grep -v "common/log" | grep -v "common/exectime" | grep -v "internal/stats_manager" | grep -v "main.go" > ./cloudfuse_coverage.rpt + cat ./cloudfuse_coverage_raw.rpt | grep -v mock_component | grep -v base_component | grep -v loopback | grep -v tools | grep -v "common/log" | grep -v "common/exectime" | grep -v "internal/stats_manager" | grep -v "main.go" | grep -v "component/azstorage/azauthmsi.go" > ./cloudfuse_coverage.rpt go tool cover -func cloudfuse_coverage.rpt > ./cloudfuse_func_cover.rpt go tool cover -html=./cloudfuse_coverage.rpt -o ./cloudfuse_coverage.html go tool cover -html=./cloudfuse_ut.cov -o ./cloudfuse_ut.html diff --git a/cloudfuse-nightly.yaml b/cloudfuse-nightly.yaml index 9d4854d22..d98edfd49 100755 --- a/cloudfuse-nightly.yaml +++ b/cloudfuse-nightly.yaml @@ -52,6 +52,11 @@ parameters: type: boolean default: true + - name: block_cache_validation + displayName: 'Block Cache Validation Test' + type: boolean + default: true + - name: verbose_log displayName: 'Verbose Log' type: boolean @@ -198,6 +203,143 @@ stages: mount_dir: $(MOUNT_DIR) temp_dir: $(TEMP_DIR) + - job: Set_1_ARM + timeoutInMinutes: 300 + + strategy: + matrix: + Ubuntu-22-ARM64-BlockBlob: + imageName: 'blobfuse-ubn22-arm64' + containerName: 'test-cnt-ubn-22-arm64' + adlsSas: $(AZTEST_ADLS_CONT_SAS_UBN_22_ARM) + fuselib: 'libfuse3-dev' + tags: 'fuse3' + + pool: + name: "blobfuse-ubn-arm64-pool" + demands: + - ImageOverride -equals $(imageName) + + variables: + - group: NightlyBlobFuse + - name: MOUNT_DIR + value: '$(Pipeline.Workspace)/blob_mnt' + - name: TEMP_DIR + value: '$(Pipeline.Workspace)/blobfuse2_tmp' + - name: BLOBFUSE2_CFG + value: '$(Pipeline.Workspace)/blobfuse2.yaml' + - name: BLOBFUSE2_SAS_CFG + value: '$(Pipeline.Workspace)/blobfuse2_sas_config.yaml' + - name: BLOBFUSE2_SPN_CFG + value: '$(Pipeline.Workspace)/blobfuse2_spn_config.yaml' + - name: BLOBFUSE2_STREAM_CFG + value: '$(Pipeline.Workspace)/blobfuse2_stream.yaml' + - name: BLOBFUSE2_STREAM_FILENAME_CFG + value: '$(Pipeline.Workspace)/blobfuse2_stream_filename.yaml' + - name: BLOBFUSE2_ADLS_CFG + value: '$(Pipeline.Workspace)/blobfuse2.adls.yaml' + - name: BLOBFUSE2_GTEST_CFG + value: '$(Pipeline.Workspace)/connection.yaml' + - name: BLOBFUSE2_AZURITE_CFG + value: '$(Pipeline.Workspace)/blobfuse2_azurite_config.yaml' + - name: BLOBFUSE2_STRESS_DIR + value: '$(Pipeline.Workspace)/blobfuse2_stress' + - name: DECODE_PERCENTS + value: false + - name: GOPATH + value: '$(Pipeline.Workspace)/go' + - name: ROOT_DIR + value: '$(System.DefaultWorkingDirectory)' + - name: WORK_DIR + value: '$(System.DefaultWorkingDirectory)/azure-storage-fuse' + + steps: + - checkout: none + + - script: | + hostnamectl + displayName: 'Host info' + workingDirectory: $(ROOT_DIR) + + # Clone the repo + - script: | + git clone https://github.com/Azure/azure-storage-fuse + displayName: 'Checkout Code' + workingDirectory: $(ROOT_DIR) + + # Checkout the branch + - script: | + git checkout `echo $(Build.SourceBranch) | cut -d "/" -f 1,2 --complement` + displayName: 'Checkout Branch' + workingDirectory: $(WORK_DIR) + + - script: | + sudo apt-get update --fix-missing + sudo apt-get install $(tags) $(fuselib) gcc -y + displayName: 'Install fuse' + + # ------------------------------------------------------- + # Pull and build the code + - template: 'azure-pipeline-templates/build.yml' + parameters: + working_directory: $(WORK_DIR) + root_dir: $(Pipeline.Workspace) + mount_dir: $(MOUNT_DIR) + temp_dir: $(TEMP_DIR) + gopath: $(GOPATH) + container: $(containerName) + tags: $(tags) + fuselib: $(fuselib) + + # ------------------------------------------------------- + - template: 'azure-pipeline-templates/invalid-command-tests.yml' + parameters: + working_dir: $(WORK_DIR) + mount_dir: $(MOUNT_DIR) + + - ${{ if eq(parameters.exhaustive_test, true) }}: + - template: 'azure-pipeline-templates/verbose-tests.yml' + parameters: + service: 'BlockBlob' + account_type: 'block' + account_endpoint: 'https://$(NIGHTLY_STO_BLOB_ACC_NAME).blob.core.windows.net' + adls: false + account_name: $(NIGHTLY_STO_BLOB_ACC_NAME) + account_key: $(NIGHTLY_STO_BLOB_ACC_KEY) + account_sas: $(NIGHTLY_STO_ACC_SAS) + spn_account_name: $(AZTEST_BLOCK_ACC_NAME) + spn_account_endpoint: 'https://$(AZTEST_BLOCK_ACC_NAME).blob.core.windows.net' + client_id: $(AZTEST_CLIENT) + tenant_id: $(AZTEST_TENANT) + client_secret: $(AZTEST_SECRET) + container: $(containerName) + config: $(BLOBFUSE2_CFG) + working_dir: $(WORK_DIR) + mount_dir: $(MOUNT_DIR) + temp_dir: $(TEMP_DIR) + stress_dir: $(BLOBFUSE2_STRESS_DIR) + huge_container: 'testcnt1' + quick_stress: ${{ parameters.quick_stress }} + test_key_credential: true + test_sas_credential: true + test_spn_credential: true + test_stream: true + test_azurite: false + stream_config: $(BLOBFUSE2_STREAM_CFG) + stream_filename_config: $(BLOBFUSE2_STREAM_FILENAME_CFG) + sas_credential_config: $(BLOBFUSE2_SAS_CFG) + spn_credential_config: $(BLOBFUSE2_SPN_CFG) + azurite_config: $(BLOBFUSE2_AZURITE_CFG) + distro_name: $(imageName) + verbose_log: ${{ parameters.verbose_log }} + tags: $(tags) + + - template: azure-pipeline-templates/cleanup.yml + parameters: + working_dir: $(WORK_DIR) + mount_dir: $(MOUNT_DIR) + temp_dir: $(TEMP_DIR) + # Ubuntu Tests - job: Set_2 timeoutInMinutes: 300 @@ -330,6 +472,134 @@ stages: mount_dir: $(MOUNT_DIR) temp_dir: $(TEMP_DIR) + - job: Set_2_ARM + timeoutInMinutes: 300 + + strategy: + matrix: + Ubuntu-22-ARM64-ADLS: + imageName: 'blobfuse-ubn22-arm64' + containerName: 'test-cnt-ubn-22-arm64' + adlsSas: $(AZTEST_ADLS_CONT_SAS_UBN_22_ARM) + fuselib: 'libfuse3-dev' + tags: 'fuse3' + pool: + name: "blobfuse-ubn-arm64-pool" + demands: + - ImageOverride -equals $(imageName) + + variables: + - group: NightlyBlobFuse + - name: MOUNT_DIR + value: '$(Pipeline.Workspace)/blob_mnt' + - name: TEMP_DIR + value: '$(Pipeline.Workspace)/blobfuse2_tmp' + - name: BLOBFUSE2_CFG + value: '$(Pipeline.Workspace)/blobfuse2.yaml' + - name: BLOBFUSE2_SAS_CFG + value: '$(Pipeline.Workspace)/blobfuse2_sas_config.yaml' + - name: BLOBFUSE2_SPN_CFG + value: '$(Pipeline.Workspace)/blobfuse2_spn_config.yaml' + - name: BLOBFUSE2_STREAM_CFG + value: '$(Pipeline.Workspace)/blobfuse2_stream.yaml' + - name: BLOBFUSE2_STREAM_FILENAME_CFG + value: '$(Pipeline.Workspace)/blobfuse2_stream_filename.yaml' + - name: BLOBFUSE2_ADLS_CFG + value: '$(Pipeline.Workspace)/blobfuse2.adls.yaml' + - name: BLOBFUSE2_GTEST_CFG + value: '$(Pipeline.Workspace)/connection.yaml' + - name: BLOBFUSE2_AZURITE_CFG + value: '$(Pipeline.Workspace)/blobfuse2_azurite_config.yaml' + - name: BLOBFUSE2_STRESS_DIR + value: '$(Pipeline.Workspace)/blobfuse2_stress' + - name: DECODE_PERCENTS + value: false + - name: GOPATH + value: '$(Pipeline.Workspace)/go' + - name: ROOT_DIR + value: '$(System.DefaultWorkingDirectory)' + - name: WORK_DIR + value: '$(System.DefaultWorkingDirectory)/azure-storage-fuse' + + steps: + - checkout: none + + # Clone the repo + - script: | + git clone https://github.com/Azure/azure-storage-fuse + displayName: 'Checkout Code' + workingDirectory: $(ROOT_DIR) + + # Checkout the branch + - script: | + git checkout `echo $(Build.SourceBranch) | cut -d "/" -f 1,2 --complement` + displayName: 'Checkout Branch' + workingDirectory: $(WORK_DIR) + + - script: | + sudo apt-get update --fix-missing + sudo apt-get install $(fuselib) gcc -y + displayName: 'Install fuse' + + # ------------------------------------------------------- + # Pull and build the code + - template: 'azure-pipeline-templates/build.yml' + parameters: + working_directory: $(WORK_DIR) + root_dir: $(Pipeline.Workspace) + mount_dir: $(MOUNT_DIR) + temp_dir: $(TEMP_DIR) + gopath: $(GOPATH) + container: $(containerName) + tags: $(tags) + fuselib: $(fuselib) + skip_ut: true # Skip UT because Block Blob set runs it + + # ------------------------------------------------------- + - ${{ if eq(parameters.exhaustive_test, true) }}: + - template: 'azure-pipeline-templates/verbose-tests.yml' + parameters: + service: 'ADLS' + account_type: 'adls' + account_endpoint: 'https://$(AZTEST_ADLS_ACC_NAME).dfs.core.windows.net' + adls: true + account_name: $(AZTEST_ADLS_ACC_NAME) + account_key: $(AZTEST_ADLS_KEY) + account_sas: $(adlsSas) + spn_account_name: $(AZTEST_ADLS_ACC_NAME) + spn_account_endpoint: 'https://$(AZTEST_ADLS_ACC_NAME).dfs.core.windows.net' + client_id: $(AZTEST_CLIENT) + tenant_id: $(AZTEST_TENANT) + client_secret: $(AZTEST_SECRET) + container: $(containerName) + config: $(BLOBFUSE2_ADLS_CFG) + working_dir: $(WORK_DIR) + mount_dir: $(MOUNT_DIR) + temp_dir: $(TEMP_DIR) + stress_dir: $(BLOBFUSE2_STRESS_DIR) + huge_container: 'testcnt' + quick_stress: ${{ parameters.quick_stress }} + test_key_credential: true + test_sas_credential: false + test_spn_credential: true + test_stream: true + test_azurite: false + stream_config: $(BLOBFUSE2_STREAM_CFG) + stream_filename_config: $(BLOBFUSE2_STREAM_FILENAME_CFG) + sas_credential_config: $(BLOBFUSE2_SAS_CFG) + spn_credential_config: $(BLOBFUSE2_SPN_CFG) + azurite_config: $(BLOBFUSE2_AZURITE_CFG) + distro_name: $(imageName) + verbose_log: ${{ parameters.verbose_log }} + tags: $(tags) + + - template: azure-pipeline-templates/cleanup.yml + parameters: + working_dir: $(WORK_DIR) + mount_dir: $(MOUNT_DIR) + temp_dir: $(TEMP_DIR) + + - ${{ if eq(parameters.proxy_test, true) }}: # ----------------------------------------------------------- # Ubuntu-20.04 Proxy tests @@ -574,6 +844,12 @@ stages: AgentName: "blobfuse-rhel8_6" containerName: "test-cnt-rhel-86" tags: 'fuse3' + RHEL-9.0: + DistroVer: "RHEL-9.0" + Description: "Red Hat Enterprise Linux 9.0" + AgentName: "blobfuse-rhel9" + containerName: "test-cnt-rhel-9" + tags: 'fuse3' pool: name: "blobfuse-rhel-pool" demands: @@ -599,12 +875,6 @@ stages: value: "/usr/pipeline/workv2/go" steps: - # Go tool installer - - task: GoTool@0 - inputs: - version: '1.20.5' - displayName: "Install Go Version" - - script: | sudo touch /etc/yum.repos.d/centos.repo sudo sh -c 'echo -e "[centos-extras]\nname=Centos extras - $basearch\nbaseurl=http://mirror.centos.org/centos/7/extras/x86_64\nenabled=1\ngpgcheck=1\ngpgkey=http://centos.org/keys/RPM-GPG-KEY-CentOS-7" > /etc/yum.repos.d/centos.repo' @@ -634,6 +904,8 @@ stages: if [ $(AgentName) == "blobfuse-rhel7_5" ]; then sudo yum-config-manager --save --setopt=rhui-rhel-7-server-dotnet-rhui-rpms.skip_if_unavailable=true sudo yum install git fuse fuse3-libs fuse3-devel fuse3 rh-python36 -y + elif [ $(AgentName) == "blobfuse-rhel9" ]; then + sudo yum install git fuse fuse3-libs fuse3-devel fuse3 python3 -y --nobest --allowerasing else sudo yum install git fuse fuse3-libs fuse3-devel fuse3 python36 -y --nobest --allowerasing fi @@ -681,12 +953,6 @@ stages: value: "/usr/pipeline/workv2/go" steps: - # Go tool installer - - task: GoTool@0 - inputs: - version: '1.20.5' - displayName: "Install Go Version" - - script: | sudo sed -i 's/mirrorlist/#mirrorlist/g' /etc/yum.repos.d/CentOS-* sudo sed -i 's|baseurl=http://mirror.centos.org|baseurl=http://vault.centos.org|g' /etc/yum.repos.d/CentOS-* @@ -754,12 +1020,6 @@ stages: value: "/usr/pipeline/workv2/go" steps: - # Go tool installer - - task: GoTool@0 - inputs: - version: '1.20.5' - displayName: "Install Go Version" - - template: 'azure-pipeline-templates/distro-tests.yml' parameters: working_dir: $(WORK_DIR) @@ -782,80 +1042,80 @@ stages: verbose_log: ${{ parameters.verbose_log }} # Debian Tests - - job: Set_8 - timeoutInMinutes: 60 - strategy: - matrix: - Debian-10.0: - DistroVer: "Debian10.0" - Description: "Debian 10" - AgentName: "blobfuse-debian10" - ContainerName: "test-cnt-deb-10" - fuselib: 'fuse libfuse-dev' - tags: 'fuse2' - Debian-11.0: - DistroVer: "Debian11.0" - Description: "Debian 11" - AgentName: "blobfuse-debian11" - ContainerName: "test-cnt-deb-11" - fuselib: 'fuse3 libfuse3-dev' - tags: 'fuse3' - - pool: - name: "blobfuse-debian-pool" - demands: - - ImageOverride -equals $(AgentName) - - variables: - - group: NightlyBlobFuse - - name: ROOT_DIR - value: "/usr/pipeline/workv2" - - name: WORK_DIR - value: "/usr/pipeline/workv2/go/src/azure-storage-fuse" - - name: skipComponentGovernanceDetection - value: true - - name: MOUNT_DIR - value: "/usr/pipeline/workv2/blob_mnt" - - name: TEMP_DIR - value: "/usr/pipeline/workv2/temp" - - name: BLOBFUSE2_CFG - value: "/usr/pipeline/workv2/cloudfuse.yaml" - - name: BLOBFUSE2_ADLS_CFG - value: "/home/vsts/workv2/cloudfuse.adls.yaml" - - name: GOPATH - value: "/usr/pipeline/workv2/go" - - steps: - # Go tool installer - - task: GoTool@0 - inputs: - version: '1.20.5' - displayName: "Install Go Version" - - - template: 'azure-pipeline-templates/distro-tests.yml' - parameters: - working_dir: $(WORK_DIR) - root_dir: $(ROOT_DIR) - temp_dir: $(TEMP_DIR) - mount_dir: $(MOUNT_DIR) - config_path: $(BLOBFUSE2_CFG) - container: $(ContainerName) - blob_account_name: $(NIGHTLY_STO_BLOB_ACC_NAME) - blob_account_key: $(NIGHTLY_STO_BLOB_ACC_KEY) - adls_account_name: $(AZTEST_ADLS_ACC_NAME) - adls_account_key: $(AZTEST_ADLS_KEY) - distro_name: $(AgentName) - tags: $(tags) - fuselib: $(fuselib) - gopath: $(GOPATH) - installStep: - script: | - sudo rm /etc/apt/sources.list.d/azure.list - sudo apt-get update --fix-missing -y - sudo apt-get install $(fuselib) -y - sudo apt-get install build-essential git python3 -y - displayName: 'Install fuse' - verbose_log: ${{ parameters.verbose_log }} + # - job: Set_8 + # timeoutInMinutes: 60 + # strategy: + # matrix: + # Debian-10.0: + # DistroVer: "Debian10.0" + # Description: "Debian 10" + # AgentName: "blobfuse-debian10" + # ContainerName: "test-cnt-deb-10" + # fuselib: 'fuse libfuse-dev' + # tags: 'fuse2' + # Debian-11.0: + # DistroVer: "Debian11.0" + # Description: "Debian 11" + # AgentName: "blobfuse-debian11" + # ContainerName: "test-cnt-deb-11" + # fuselib: 'fuse3 libfuse3-dev' + # tags: 'fuse3' + + # pool: + # name: "blobfuse-debian-pool" + # demands: + # - ImageOverride -equals $(AgentName) + + # variables: + # - group: NightlyBlobFuse + # - name: ROOT_DIR + # value: "/usr/pipeline/workv2" + # - name: WORK_DIR + # value: "/usr/pipeline/workv2/go/src/azure-storage-fuse" + # - name: skipComponentGovernanceDetection + # value: true + # - name: MOUNT_DIR + # value: "/usr/pipeline/workv2/blob_mnt" + # - name: TEMP_DIR + # value: "/usr/pipeline/workv2/temp" + # - name: BLOBFUSE2_CFG + # value: "/usr/pipeline/workv2/blobfuse2.yaml" + # - name: BLOBFUSE2_ADLS_CFG + # value: "/home/vsts/workv2/blobfuse2.adls.yaml" + # - name: GOPATH + # value: "/usr/pipeline/workv2/go" + + # steps: + # # Go tool installer + # - task: GoTool@0 + # inputs: + # version: '1.20.5' + # displayName: "Install Go Version" + + # - template: 'azure-pipeline-templates/distro-tests.yml' + # parameters: + # working_dir: $(WORK_DIR) + # root_dir: $(ROOT_DIR) + # temp_dir: $(TEMP_DIR) + # mount_dir: $(MOUNT_DIR) + # config_path: $(BLOBFUSE2_CFG) + # container: $(ContainerName) + # blob_account_name: $(NIGHTLY_STO_BLOB_ACC_NAME) + # blob_account_key: $(NIGHTLY_STO_BLOB_ACC_KEY) + # adls_account_name: $(AZTEST_ADLS_ACC_NAME) + # adls_account_key: $(AZTEST_ADLS_KEY) + # distro_name: $(AgentName) + # tags: $(tags) + # fuselib: $(fuselib) + # gopath: $(GOPATH) + # installStep: + # script: | + # sudo rm /etc/apt/sources.list.d/azure.list + # sudo apt-get update --fix-missing -y + # sudo apt-get install $(fuselib) -y + # sudo apt-get install build-essential git python3 -y + # displayName: 'Install fuse' + # verbose_log: ${{ parameters.verbose_log }} # SUSE Tests - job: Set_9 @@ -893,12 +1153,6 @@ stages: value: "/usr/pipeline/workv2/go" steps: - # Go tool installer - - task: GoTool@0 - inputs: - version: '1.20.5' - displayName: "Install Go Version" - - template: 'azure-pipeline-templates/distro-tests.yml' parameters: working_dir: $(WORK_DIR) @@ -936,7 +1190,13 @@ stages: ContainerName: "test-cnt-mari-1" fuselib: 'libfuse-dev' tags: 'fuse2' - + Mariner2: + DistroVer: "Mariner2" + Description: "CBL-Mariner2 Linux" + AgentName: "blobfuse-mariner2" + ContainerName: "test-cnt-mari-2" + fuselib: 'libfuse3-dev' + tags: 'fuse3' pool: name: "blobfuse-mariner-pool" demands: @@ -962,12 +1222,6 @@ stages: value: "/usr/pipeline/workv2/go" steps: - # Go tool installer - - task: GoTool@0 - inputs: - version: '1.20.5' - displayName: "Install Go Version" - - template: 'azure-pipeline-templates/distro-tests.yml' parameters: working_dir: $(WORK_DIR) @@ -986,7 +1240,11 @@ stages: gopath: $(GOPATH) installStep: script: | - sudo tdnf install build-essential git fuse fuse-devel python36 -y + if [ $(AgentName) == "blobfuse-mariner" ]; then + sudo tdnf install gcc build-essential git fuse fuse-devel python36 -y + else + sudo tdnf install gcc build-essential git fuse3 fuse3-devel python3 -y + fi displayName: 'Install fuse' verbose_log: ${{ parameters.verbose_log }} @@ -1039,8 +1297,8 @@ stages: # Install libfuse - script: | - sudo apt-get install make cmake gcc g++ libfuse3-dev fuse3 -y -o Dpkg::Options::="--force-confnew" sudo apt-get update --fix-missing -o Dpkg::Options::="--force-confnew" + sudo apt-get install make cmake gcc g++ libfuse3-dev fuse3 -y -o Dpkg::Options::="--force-confnew" displayName: 'Install Fuse' # Prestart cleanup @@ -1151,6 +1409,72 @@ stages: mount_dir: $(MOUNT_DIR) temp_dir: $(TEMP_DIR) + + - ${{ if eq(parameters.block_cache_validation, true) }}: + - stage: DataValidationBlockCache + jobs: + # Ubuntu Tests + - job: Set_1 + timeoutInMinutes: 300 + strategy: + matrix: + Ubuntu-22: + imageName: 'ubuntu-22.04' + containerName: 'test-cnt-ubn-22' + fuselib: 'libfuse3-dev' + tags: 'fuse3' + pool: + vmImage: $(imageName) + + variables: + - group: NightlyBlobFuse + - name: ROOT_DIR + value: "/usr/pipeline/workv2" + - name: WORK_DIR + value: "/usr/pipeline/workv2/go/src/azure-storage-fuse" + - name: skipComponentGovernanceDetection + value: true + - name: MOUNT_DIR + value: "/usr/pipeline/workv2/blob_mnt" + - name: TEMP_DIR + value: "/usr/pipeline/workv2/temp" + - name: BLOBFUSE2_CFG + value: "/usr/pipeline/workv2/blobfuse2.yaml" + - name: GOPATH + value: "/usr/pipeline/workv2/go" + + steps: + - checkout: none + + - template: 'azure-pipeline-templates/setup.yml' + parameters: + tags: $(tags) + installStep: + script: | + sudo apt-get update --fix-missing + sudo apt-get install $(fuselib) -y + displayName: 'Install fuse' + + - template: 'azure-pipeline-templates/e2e-tests-block-cache.yml' + parameters: + conf_template: azure_key.yaml + config_file: $(BLOBFUSE2_CFG) + container: $(containerName) + idstring: Block_Blob + adls: false + account_name: $(NIGHTLY_STO_BLOB_ACC_NAME) + account_key: $(NIGHTLY_STO_BLOB_ACC_KEY) + account_type: block + account_endpoint: https://$(NIGHTLY_STO_BLOB_ACC_NAME).blob.core.windows.net + distro_name: $(imageName) + quick_test: false + verbose_log: ${{ parameters.verbose_log }} + clone: true + stream_direct_test: false + # TODO: These can be removed one day and replace all instances of ${{ parameters.temp_dir }} with $(TEMP_DIR) since it is a global variable + temp_dir: $(TEMP_DIR) + mount_dir: $(MOUNT_DIR) + - ${{ if eq(parameters.data_validation, true) }}: - stage: DataValidationBlob jobs: @@ -1634,7 +1958,7 @@ stages: - task: ShellScript@2 inputs: scriptPath: "$(WORK_DIR)/go_installer.sh" - args: "$(ROOT_DIR)/ 1.20.5" + args: "$(ROOT_DIR)/" displayName: "GoTool Custom Setup" # Downloading Go dependency packages diff --git a/cloudfuse-release.yaml b/cloudfuse-release.yaml index fd322b357..42457f9de 100644 --- a/cloudfuse-release.yaml +++ b/cloudfuse-release.yaml @@ -142,7 +142,107 @@ stages: inputs: artifactName: 'cloudfuse-temp' displayName: 'Publish Artifacts' - # BuildArtifacts end here + + - job: Set_2_ARM + timeoutInMinutes: 120 + strategy: + matrix: + Libfuse3: + vmImage: 'ubuntu-22.04' + fuselib: 'libfuse3-dev' + tags: 'fuse3' + depends: 'fuse3' + container: 'test-cnt-ubn-22-arm64' + AgentName: 'blobfuse-ubn22-arm64' + + pool: + name: "blobfuse-ubn-arm64-pool" + demands: + - ImageOverride -equals $(agentName) + + variables: + - group: NightlyBlobFuse + - name: root_dir + value: '$(System.DefaultWorkingDirectory)' + - name: work_dir + value: '$(System.DefaultWorkingDirectory)/azure-storage-fuse' + + steps: + - checkout: none + - script: | + git clone https://github.com/Azure/azure-storage-fuse + displayName: 'Checkout Code' + workingDirectory: $(root_dir) + + # list commits from past 12hrs + - script: | + git checkout `echo $(Build.SourceBranch) | cut -d "/" -f 1,2 --complement` + git --no-pager log --since="12 hours ago" --stat + displayName: 'List Commits' + workingDirectory: $(work_dir) + + # install dependencies required for compiling blobfuse + - script: | + sudo apt-get update --fix-missing + sudo apt-get install ruby-dev build-essential pkg-config cmake gcc g++ rpm $(fuselib) -y + sudo gem install fpm -V + displayName: "Installing Dependencies" + + # get glibc version with which build is done + - script: | + ldd --version + displayName: "GLIBC Version" + + # build cloudfuse and generate binary + - template: 'azure-pipeline-templates/build-release.yml' + parameters: + work_dir: $(work_dir) + root_dir: $(root_dir) + unit_test: ${{ parameters.unit_test }} + tags: $(tags) + container: $(container) + + # place the generated binary files & any additional files in appropriate locations + - script: | + mkdir -p pkgDir/usr/bin/ + mkdir -p pkgDir/usr/share/cloudfuse/ + cp azure-storage-fuse/cloudfuse pkgDir/usr/bin/cloudfuse + cp azure-storage-fuse/bfusemon pkgDir/usr/bin/bfusemon + cp azure-storage-fuse/setup/baseConfig.yaml pkgDir/usr/share/cloudfuse/ + cp azure-storage-fuse/sampleFileCacheConfig.yaml pkgDir/usr/share/cloudfuse/ + cp azure-storage-fuse/sampleStreamingConfig.yaml pkgDir/usr/share/cloudfuse/ + mkdir -p pkgDir/etc/rsyslog.d + mkdir -p pkgDir/etc/logrotate.d + cp azure-storage-fuse/setup/11-cloudfuse.conf pkgDir/etc/rsyslog.d + cp azure-storage-fuse/setup/cloudfuse-logrotate pkgDir/etc/logrotate.d/cloudfuse + workingDirectory: $(root_dir) + displayName: 'Accumulate pkg files' + + # using fpm tool for packaging of our binary & performing post-install operations + # for additional information about fpm refer https://fpm.readthedocs.io/en/v1.13.1/ + - script: | + fpm -s dir -t deb -n cloudfuse -C pkgDir/ -v `./pkgDir/usr/bin/cloudfuse --version | cut -d " " -f 3` -d $(depends) \ + --maintainer "Blobfuse v-Team " --url "https://github.com/Azure/azure-storage-fuse" \ + --description "An user-space filesystem for interacting with Azure Storage" + mv ./cloudfuse*.deb ./cloudfuse-`./pkgDir/usr/bin/cloudfuse --version | cut -d " " -f 3`-$(tags).arm64.deb + cp ./cloudfuse*.deb $(Build.ArtifactStagingDirectory) + workingDirectory: $(root_dir) + displayName: 'Make deb Package' + + - script: | + fpm -s dir -t rpm -n cloudfuse -C pkgDir/ -v `./pkgDir/usr/bin/cloudfuse --version | cut -d " " -f 3` -d $(depends) \ + --maintainer "Blobfuse v-Team " --url "https://github.com/Azure/azure-storage-fuse" \ + --description "An user-space filesystem for interacting with Azure Storage" + mv ./cloudfuse*.rpm ./cloudfuse-`./pkgDir/usr/bin/cloudfuse --version | cut -d " " -f 3`-$(tags).aarch64.rpm + cp ./cloudfuse*.rpm $(Build.ArtifactStagingDirectory) + workingDirectory: $(root_dir) + displayName: 'Make rpm Package' + + - task: PublishBuildArtifacts@1 + inputs: + artifactName: 'cloudfuse-temp' + displayName: 'Publish Artifacts' +# BuildArtifacts end here - stage: SignArtifacts dependsOn: BuildArtifacts @@ -266,11 +366,6 @@ stages: steps: - checkout: none - - task: GoTool@0 - inputs: - version: '1.20.5' - displayName: "GoTool Setup" - - script: | git clone https://github.com/Azure/azure-storage-fuse displayName: 'Checkout Code' @@ -279,6 +374,13 @@ stages: git checkout `echo $(Build.SourceBranch) | cut -d "/" -f 1,2 --complement` displayName: 'Checkout Branch' workingDirectory: $(root_dir)/azure-storage-fuse + + # Custom script to install Go-lang + - task: ShellScript@2 + inputs: + scriptPath: "$(work_dir)/go_installer.sh" + args: "$(root_dir)/" + displayName: "GoTool Custom Setup" # get glibc version with which build is done - script: | @@ -290,7 +392,16 @@ stages: inputs: artifactName: 'cloudfuse-signed' downloadPath: $(root_dir) - itemPattern: cloudfuse-signed/cloudfuse*$(tags)*.deb + itemPattern: cloudfuse-signed/cloudfuse*$(tags)*x86_64.deb + + - script: | + ls -l + result=$(ls -1 | wc -l) + if [ $result -ne 1 ]; then + exit 1 + fi + displayName: 'List Downloaded Package' + workingDirectory: $(root_dir)/cloudfuse-signed - script: | for f in ./cloudfuse*$(tags)*.deb; do mv -v "$f" "${f/-$(tags)./-$(vmImage).}"; done; @@ -365,21 +476,23 @@ stages: steps: - checkout: none - - task: GoTool@0 - inputs: - version: '1.20.5' - displayName: "GoTool Setup" - - script: | git clone https://github.com/Azure/azure-storage-fuse displayName: 'Checkout Code' workingDirectory: $(root_dir) - + - script: | git checkout `echo $(Build.SourceBranch) | cut -d "/" -f 1,2 --complement` displayName: 'Checkout Branch' workingDirectory: $(root_dir)/azure-storage-fuse + # Custom script to install Go-lang + - task: ShellScript@2 + inputs: + scriptPath: "$(work_dir)/go_installer.sh" + args: "$(root_dir)/" + displayName: "GoTool Custom Setup" + # get glibc version with which build is done - script: | sudo apt update @@ -392,11 +505,36 @@ stages: inputs: artifactName: 'cloudfuse-signed' downloadPath: $(root_dir) - itemPattern: cloudfuse-signed/cloudfuse*$(tags)*.deb + itemPattern: cloudfuse-signed/cloudfuse*$(tags)*x86_64.deb + + - script: | + ls -l + result=$(ls -1 | wc -l) + if [ $result -ne 1 ]; then + exit 1 + fi + displayName: 'List Downloaded Package' + workingDirectory: $(root_dir)/cloudfuse-signed - script: | for f in ./cloudfuse*$(tags)*.deb; do mv -v "$f" "${f/-$(tags)./-$(vmImage).}"; done; cp ./cloudfuse*$(vmImage)*.deb $(Build.ArtifactStagingDirectory) + if [ $(AgentName) == "blobfuse-ubuntu18" ]; then + echo "Generating for Debian 9/10" + f=`ls ./cloudfuse*$(vmImage)*.deb` + cp "$f" $(sed 's:Ubuntu-18.04:Debian-9.0:' <<< "$f") + cp "$f" $(sed 's:Ubuntu-18.04:Debian-10.0:' <<< "$f") + cp ./cloudfuse*Debian-*.deb $(Build.ArtifactStagingDirectory) + ls -l $(Build.ArtifactStagingDirectory) + rm -rf ./cloudfuse*Debian-*.deb + else + echo "Generating for Debian 11" + f=`ls ./cloudfuse*$(vmImage)*.deb` + cp "$f" $(sed 's:Ubuntu-20.04:Debian-11.0:' <<< "$f") + cp ./cloudfuse*Debian-*.deb $(Build.ArtifactStagingDirectory) + ls -l $(Build.ArtifactStagingDirectory) + rm -rf ./cloudfuse*Debian-*.deb + fi displayName: 'Rename Package' workingDirectory: $(root_dir)/cloudfuse-signed @@ -421,29 +559,21 @@ stages: artifactName: 'cloudfuse' displayName: 'Publish Artifacts' - - job: Set_3 + - job: Set_2_ARM timeoutInMinutes: 120 strategy: matrix: - Debian-10.0: - agentName: "blobfuse-debian10" - vmImage: 'Debian-10.0' - fuselib: 'libfuse-dev' - fuse-version: 'fuse' - tags: 'fuse2' - container: 'test-cnt-deb-10' - Debian-11.0: - agentName: "blobfuse-debian11" - vmImage: 'Debian-11.0' + Ubuntu-22-ARM64: + vmImage: 'Ubuntu-22.04' fuse-version: 'fuse3' fuselib: 'libfuse3-dev' tags: 'fuse3' - container: 'test-cnt-deb-11' - + container: 'test-cnt-ubn-22-arm64' + AgentName: "blobfuse-ubn22-arm64" pool: - name: "blobfuse-debian-pool" + name: "blobfuse-ubn-arm64-pool" demands: - - ImageOverride -equals $(agentName) + - ImageOverride -equals $(AgentName) variables: - group: NightlyBlobFuse @@ -459,14 +589,107 @@ stages: steps: - checkout: none - - task: GoTool@0 + - script: | + git clone https://github.com/Azure/azure-storage-fuse + displayName: 'Checkout Code' + workingDirectory: $(root_dir) + + - script: | + git checkout `echo $(Build.SourceBranch) | cut -d "/" -f 1,2 --complement` + displayName: 'Checkout Branch' + workingDirectory: $(root_dir)/azure-storage-fuse + + # Custom script to install Go-lang + - task: ShellScript@2 inputs: - version: '1.20.5' - displayName: "GoTool Setup" + scriptPath: "$(work_dir)/go_installer.sh" + args: "$(root_dir)/" + displayName: "GoTool Custom Setup" + # get glibc version with which build is done - script: | - sudo apt-get update -y - sudo apt-get install git -y + sudo apt update + sudo apt --fix-broken install + ldd --version + displayName: "GLIBC Version" + + - task: DownloadBuildArtifacts@0 + displayName: 'Download Build Artifacts' + inputs: + artifactName: 'cloudfuse-signed' + downloadPath: $(root_dir) + itemPattern: cloudfuse-signed/cloudfuse*$(tags)*arm64.deb + + - script: | + ls -l + result=$(ls -1 | wc -l) + if [ $result -ne 1 ]; then + exit 1 + fi + displayName: 'List Downloaded Package' + workingDirectory: $(root_dir)/cloudfuse-signed + + - script: | + for f in ./cloudfuse*$(tags)*.deb; do mv -v "$f" "${f/-$(tags)./-$(vmImage).}"; done; + cp ./cloudfuse*$(vmImage)*.deb $(Build.ArtifactStagingDirectory) + displayName: 'Rename Package' + workingDirectory: $(root_dir)/cloudfuse-signed + + - script: | + sudo dpkg --info cloudfuse*.deb + sudo apt-get install $(fuse-version) build-essential -y + sudo dpkg -i cloudfuse*.deb + displayName: 'Install Package' + workingDirectory: $(Build.ArtifactStagingDirectory) + + - template: 'azure-pipeline-templates/release-distro-tests.yml' + parameters: + root_dir: $(root_dir) + work_dir: $(work_dir) + mount_dir: $(mount_dir) + temp_dir: $(temp_dir) + container: $(container) + + # publishing the artifacts generated + - task: PublishBuildArtifacts@1 + inputs: + artifactName: 'cloudfuse' + displayName: 'Publish Artifacts' + + - job: Set_2_AARCH + timeoutInMinutes: 120 + strategy: + matrix: + RHEL-9.0: + agentName: "blobfuse-rhel9-arm64" + vmImage: 'RHEL-9.0' + fuselib: 'fuse fuse3-libs fuse3-devel' + fuse-version: 'fuse3' + tags: 'fuse3' + container: 'test-cnt-rhel-9-arm64' + + pool: + name: "blobfuse-ubn-arm64-pool" + demands: + - ImageOverride -equals $(agentName) + + variables: + - group: NightlyBlobFuse + - name: root_dir + value: '$(System.DefaultWorkingDirectory)' + - name: work_dir + value: '$(System.DefaultWorkingDirectory)/azure-storage-fuse' + - name: mount_dir + value: '$(System.DefaultWorkingDirectory)/fusetmp' + - name: temp_dir + value: '$(System.DefaultWorkingDirectory)/fusetmpcache' + + steps: + - checkout: none + + - script: | + sudo yum update -y + sudo yum install git -y displayName: 'Install Git' - script: | @@ -478,7 +701,14 @@ stages: git checkout `echo $(Build.SourceBranch) | cut -d "/" -f 1,2 --complement` displayName: 'Checkout Branch' workingDirectory: $(root_dir)/azure-storage-fuse - + + # Custom script to install Go-lang + - task: ShellScript@2 + inputs: + scriptPath: "$(work_dir)/go_installer.sh" + args: "$(root_dir)/" + displayName: "GoTool Custom Setup" + # get glibc version with which build is done - script: | ldd --version @@ -489,24 +719,29 @@ stages: inputs: artifactName: 'cloudfuse-signed' downloadPath: $(root_dir) - itemPattern: cloudfuse-signed/cloudfuse*$(tags)*.deb + itemPattern: cloudfuse-signed/cloudfuse*$(tags)*aarch64.rpm - script: | - for f in ./cloudfuse*$(tags)*.deb; do mv -v "$f" "${f/-$(tags)./-$(vmImage).}"; done; - cp ./cloudfuse*$(vmImage)*.deb $(Build.ArtifactStagingDirectory) - if [ $(agentName) == "blobfuse-debian10" ]; then - f=`ls ./cloudfuse*$(vmImage)*.deb` - cp "$f" $(sed 's:Debian-10:Debian-9:' <<< "$f") - cp ./cloudfuse*Debian-9*.deb $(Build.ArtifactStagingDirectory) - rm -rf ./cloudfuse*Debian-9*.deb + ls -l + result=$(ls -1 | wc -l) + if [ $result -ne 1 ]; then + exit 1 fi + displayName: 'List Downloaded Package' + workingDirectory: $(root_dir)/cloudfuse-signed + + - script: | + for f in ./cloudfuse*$(tags)*.rpm; do mv -v "$f" "${f/-$(tags)./-$(vmImage).}"; done; + cp ./cloudfuse*$(vmImage)*.rpm $(Build.ArtifactStagingDirectory) displayName: 'Rename Package' workingDirectory: $(root_dir)/cloudfuse-signed - script: | - sudo dpkg --info cloudfuse*$(vmImage)*.deb - sudo apt-get install $(fuse-version) build-essential -y - sudo dpkg -i ./cloudfuse*$(vmImage)*.deb + sudo sed -i '/^failovermethod=/d' /etc/yum.repos.d/*.repo + sudo rpm -qip cloudfuse*$(vmImage)*.rpm + sudo yum groupinstall "Development Tools" -y + sudo yum install fuse fuse3-libs fuse3-devel fuse3 -y --nobest --allowerasing + sudo rpm -i cloudfuse*$(vmImage)*.rpm displayName: 'Install Package' workingDirectory: $(Build.ArtifactStagingDirectory) @@ -523,6 +758,118 @@ stages: inputs: artifactName: 'cloudfuse' displayName: 'Publish Artifacts' + + # - job: Set_3 + # timeoutInMinutes: 120 + # strategy: + # matrix: + # Debian-10.0: + # agentName: "blobfuse-debian10" + # vmImage: 'Debian-10.0' + # fuselib: 'libfuse-dev' + # fuse-version: 'fuse' + # tags: 'fuse2' + # container: 'test-cnt-deb-10' + # Debian-11.0: + # agentName: "blobfuse-debian11" + # vmImage: 'Debian-11.0' + # fuse-version: 'fuse3' + # fuselib: 'libfuse3-dev' + # tags: 'fuse3' + # container: 'test-cnt-deb-11' + + # pool: + # name: "blobfuse-debian-pool" + # demands: + # - ImageOverride -equals $(agentName) + + # variables: + # - group: NightlyBlobFuse + # - name: root_dir + # value: '$(System.DefaultWorkingDirectory)' + # - name: work_dir + # value: '$(System.DefaultWorkingDirectory)/azure-storage-fuse' + # - name: mount_dir + # value: '$(System.DefaultWorkingDirectory)/fusetmp' + # - name: temp_dir + # value: '$(System.DefaultWorkingDirectory)/fusetmpcache' + + # steps: + # - checkout: none + + # - task: GoTool@0 + # inputs: + # version: '1.20.5' + # displayName: "GoTool Setup" + + # - script: | + # sudo apt-get update -y + # sudo apt-get install git -y + # displayName: 'Install Git' + + # - script: | + # git clone https://github.com/Azure/azure-storage-fuse + # displayName: 'Checkout Code' + # workingDirectory: $(root_dir) + + # - script: | + # git checkout `echo $(Build.SourceBranch) | cut -d "/" -f 1,2 --complement` + # displayName: 'Checkout Branch' + # workingDirectory: $(root_dir)/azure-storage-fuse + + # # get glibc version with which build is done + # - script: | + # ldd --version + # displayName: "GLIBC Version" + + # - task: DownloadBuildArtifacts@0 + # displayName: 'Download Build Artifacts' + # inputs: + # artifactName: 'cloudfuse-signed' + # downloadPath: $(root_dir) + # itemPattern: cloudfuse-signed/cloudfuse*$(tags)*x86_64.deb + + # - script: | + # ls -l + # result=$(ls -1 | wc -l) + # if [ $result -ne 1 ]; then + # exit 1 + # fi + # displayName: 'List Downloaded Package' + # workingDirectory: $(root_dir)/cloudfuse-signed + + # - script: | + # for f in ./cloudfuse*$(tags)*.deb; do mv -v "$f" "${f/-$(tags)./-$(vmImage).}"; done; + # cp ./cloudfuse*$(vmImage)*.deb $(Build.ArtifactStagingDirectory) + # if [ $(agentName) == "blobfuse-debian10" ]; then + # f=`ls ./cloudfuse*$(vmImage)*.deb` + # cp "$f" $(sed 's:Debian-10:Debian-9:' <<< "$f") + # cp ./cloudfuse*Debian-9*.deb $(Build.ArtifactStagingDirectory) + # rm -rf ./cloudfuse*Debian-9*.deb + # fi + # displayName: 'Rename Package' + # workingDirectory: $(root_dir)/cloudfuse-signed + + # - script: | + # sudo dpkg --info cloudfuse*$(vmImage)*.deb + # sudo apt-get install $(fuse-version) build-essential -y + # sudo dpkg -i ./cloudfuse*$(vmImage)*.deb + # displayName: 'Install Package' + # workingDirectory: $(Build.ArtifactStagingDirectory) + + # - template: 'azure-pipeline-templates/release-distro-tests.yml' + # parameters: + # root_dir: $(root_dir) + # work_dir: $(work_dir) + # mount_dir: $(mount_dir) + # temp_dir: $(temp_dir) + # container: $(container) + + # # publishing the artifacts generated + # - task: PublishBuildArtifacts@1 + # inputs: + # artifactName: 'cloudfuse' + # displayName: 'Publish Artifacts' - job: Set_4 timeoutInMinutes: 120 @@ -542,7 +889,14 @@ stages: fuse-version: 'fuse3' tags: 'fuse3' container: 'test-cnt-rhel-86' - + RHEL-9.0: + agentName: "blobfuse-rhel9" + vmImage: 'RHEL-9.0' + fuselib: 'fuse fuse3-libs fuse3-devel' + fuse-version: 'fuse3' + tags: 'fuse3' + container: 'test-cnt-rhel-9' + pool: name: "blobfuse-rhel-pool" demands: @@ -562,11 +916,6 @@ stages: steps: - checkout: none - - task: GoTool@0 - inputs: - version: '1.20.5' - displayName: "GoTool Setup" - - script: | sudo touch /etc/yum.repos.d/centos.repo sudo sh -c 'echo -e "[centos-extras]\nname=Centos extras - $basearch\nbaseurl=http://mirror.centos.org/centos/7/extras/x86_64\nenabled=1\ngpgcheck=1\ngpgkey=http://centos.org/keys/RPM-GPG-KEY-CentOS-7" > /etc/yum.repos.d/centos.repo' @@ -587,7 +936,14 @@ stages: git checkout `echo $(Build.SourceBranch) | cut -d "/" -f 1,2 --complement` displayName: 'Checkout Branch' workingDirectory: $(root_dir)/azure-storage-fuse - + + # Custom script to install Go-lang + - task: ShellScript@2 + inputs: + scriptPath: "$(work_dir)/go_installer.sh" + args: "$(root_dir)/" + displayName: "GoTool Custom Setup" + # get glibc version with which build is done - script: | ldd --version @@ -598,7 +954,16 @@ stages: inputs: artifactName: 'cloudfuse-signed' downloadPath: $(root_dir) - itemPattern: cloudfuse-signed/cloudfuse*$(tags)*.rpm + itemPattern: cloudfuse-signed/cloudfuse*$(tags)*x86_64.rpm + + - script: | + ls -l + result=$(ls -1 | wc -l) + if [ $result -ne 1 ]; then + exit 1 + fi + displayName: 'List Downloaded Package' + workingDirectory: $(root_dir)/cloudfuse-signed - script: | for f in ./cloudfuse*$(tags)*.rpm; do mv -v "$f" "${f/-$(tags)./-$(vmImage).}"; done; @@ -679,11 +1044,6 @@ stages: steps: - checkout: none - - task: GoTool@0 - inputs: - version: '1.20.5' - displayName: "GoTool Setup" - - script: | sudo sed -i 's/mirrorlist/#mirrorlist/g' /etc/yum.repos.d/CentOS-* sudo sed -i 's|baseurl=http://mirror.centos.org|baseurl=http://vault.centos.org|g' /etc/yum.repos.d/CentOS-* @@ -704,7 +1064,14 @@ stages: git checkout `echo $(Build.SourceBranch) | cut -d "/" -f 1,2 --complement` displayName: 'Checkout Branch' workingDirectory: $(root_dir)/azure-storage-fuse - + + # Custom script to install Go-lang + - task: ShellScript@2 + inputs: + scriptPath: "$(work_dir)/go_installer.sh" + args: "$(root_dir)/" + displayName: "GoTool Custom Setup" + # get glibc version with which build is done - script: | ldd --version @@ -715,7 +1082,16 @@ stages: inputs: artifactName: 'cloudfuse-signed' downloadPath: $(root_dir) - itemPattern: cloudfuse-signed/cloudfuse*$(tags)*.rpm + itemPattern: cloudfuse-signed/cloudfuse*$(tags)*x86_64.rpm + + - script: | + ls -l + result=$(ls -1 | wc -l) + if [ $result -ne 1 ]; then + exit 1 + fi + displayName: 'List Downloaded Package' + workingDirectory: $(root_dir)/cloudfuse-signed - script: | for f in ./cloudfuse*$(tags)*.rpm; do mv -v "$f" "${f/-$(tags)./-$(vmImage).}"; done; @@ -780,11 +1156,6 @@ stages: steps: - checkout: none - - task: GoTool@0 - inputs: - version: '1.20.5' - displayName: "GoTool Setup" - - script: | sudo yum update -y sudo yum install git -y @@ -799,7 +1170,14 @@ stages: git checkout `echo $(Build.SourceBranch) | cut -d "/" -f 1,2 --complement` displayName: 'Checkout Branch' workingDirectory: $(root_dir)/azure-storage-fuse - + + # Custom script to install Go-lang + - task: ShellScript@2 + inputs: + scriptPath: "$(work_dir)/go_installer.sh" + args: "$(root_dir)/" + displayName: "GoTool Custom Setup" + # get glibc version with which build is done - script: | ldd --version @@ -810,7 +1188,16 @@ stages: inputs: artifactName: 'cloudfuse-signed' downloadPath: $(root_dir) - itemPattern: cloudfuse-signed/cloudfuse*$(tags)*.rpm + itemPattern: cloudfuse-signed/cloudfuse*$(tags)*x86_64.rpm + + - script: | + ls -l + result=$(ls -1 | wc -l) + if [ $result -ne 1 ]; then + exit 1 + fi + displayName: 'List Downloaded Package' + workingDirectory: $(root_dir)/cloudfuse-signed - script: | for f in ./cloudfuse*$(tags)*.rpm; do mv -v "$f" "${f/-$(tags)./-$(vmImage).}"; done; @@ -870,11 +1257,6 @@ stages: steps: - checkout: none - - task: GoTool@0 - inputs: - version: '1.20.5' - displayName: "GoTool Setup" - - script: | sudo zypper update -y sudo zypper -n install git @@ -889,7 +1271,14 @@ stages: git checkout `echo $(Build.SourceBranch) | cut -d "/" -f 1,2 --complement` displayName: 'Checkout Branch' workingDirectory: $(root_dir)/azure-storage-fuse - + + # Custom script to install Go-lang + - task: ShellScript@2 + inputs: + scriptPath: "$(work_dir)/go_installer.sh" + args: "$(root_dir)/" + displayName: "GoTool Custom Setup" + # get glibc version with which build is done - script: | ldd --version @@ -900,7 +1289,16 @@ stages: inputs: artifactName: 'cloudfuse-signed' downloadPath: $(root_dir) - itemPattern: cloudfuse-signed/cloudfuse*$(tags)*.rpm + itemPattern: cloudfuse-signed/cloudfuse*$(tags)*x86_64.rpm + + - script: | + ls -l + result=$(ls -1 | wc -l) + if [ $result -ne 1 ]; then + exit 1 + fi + displayName: 'List Downloaded Package' + workingDirectory: $(root_dir)/cloudfuse-signed - script: | for f in ./cloudfuse*$(tags)*.rpm; do mv -v "$f" "${f/-$(tags)./-$(vmImage).}"; done; @@ -962,11 +1360,6 @@ stages: steps: - checkout: none - - task: GoTool@0 - inputs: - version: '1.20.5' - displayName: "GoTool Setup" - - script: | sudo tdnf install build-essential git fuse fuse-devel python36 -y displayName: 'Install Git' @@ -980,7 +1373,14 @@ stages: git checkout `echo $(Build.SourceBranch) | cut -d "/" -f 1,2 --complement` displayName: 'Checkout Branch' workingDirectory: $(root_dir)/azure-storage-fuse - + + # Custom script to install Go-lang + - task: ShellScript@2 + inputs: + scriptPath: "$(work_dir)/go_installer.sh" + args: "$(root_dir)/" + displayName: "GoTool Custom Setup" + # get glibc version with which build is done - script: | ldd --version @@ -991,7 +1391,16 @@ stages: inputs: artifactName: 'cloudfuse-signed' downloadPath: $(root_dir) - itemPattern: cloudfuse-signed/cloudfuse*$(tags)*.rpm + itemPattern: cloudfuse-signed/cloudfuse*$(tags)*x86_64.rpm + + - script: | + ls -l + result=$(ls -1 | wc -l) + if [ $result -ne 1 ]; then + exit 1 + fi + displayName: 'List Downloaded Package' + workingDirectory: $(root_dir)/cloudfuse-signed - script: | for f in ./cloudfuse*$(tags)*.rpm; do mv -v "$f" "${f/-$(tags)./-$(vmImage).}"; done; diff --git a/cmd/health-monitor_test.go b/cmd/health-monitor_test.go index 93792e7e0..70bc5162d 100644 --- a/cmd/health-monitor_test.go +++ b/cmd/health-monitor_test.go @@ -36,9 +36,12 @@ package cmd import ( "fmt" + "math/rand" "os" "runtime" + "strconv" "testing" + "time" "cloudfuse/common" "cloudfuse/common/log" @@ -71,6 +74,19 @@ type hmonTestSuite struct { assert *assert.Assertions } +func generateRandomPID() string { + rand.Seed(time.Now().UnixNano()) + var randpid int + for i := 0; i <= 5; i++ { + randpid = rand.Intn(90000) + 10000 + _, err := os.FindProcess(randpid) + if err != nil { + break + } + } + return strconv.Itoa(randpid) +} + func (suite *hmonTestSuite) SetupTest() { suite.assert = assert.New(suite.T()) err := log.SetDefaultLogger("silent", common.LogConfig{Level: common.ELogLevel.LOG_DEBUG()}) @@ -96,7 +112,7 @@ func (suite *hmonTestSuite) TestValidateHmonOptions() { suite.assert.Contains(err.Error(), "pid of cloudfuse process not given") suite.assert.Contains(err.Error(), "config file not given") - pid = "12345" + pid = generateRandomPID() configFile = "config.yaml" err = validateHMonOptions() suite.assert.Nil(err) @@ -134,7 +150,7 @@ func (suite *hmonTestSuite) TestHmonInvalidOptions() { func (suite *hmonTestSuite) TestHmonInvalidConfigFile() { defer suite.cleanupTest() - op, err := executeCommandC(rootCmd, "health-monitor", "--pid=12345", "--config-file=cfgNotFound.yaml") + op, err := executeCommandC(rootCmd, "health-monitor", fmt.Sprintf("--pid=%s", generateRandomPID()), "--config-file=cfgNotFound.yaml") suite.assert.NotNil(err) suite.assert.Contains(op, "invalid config file") // The error message is different on Windows, so need to test with cases @@ -157,7 +173,7 @@ func (suite *hmonTestSuite) TestHmonWithConfigFailure() { suite.assert.Nil(err) confFile.Close() - op, err := executeCommandC(rootCmd, "health-monitor", "--pid=12345", fmt.Sprintf("--config-file=%s", cfgFileHmonTest)) + op, err := executeCommandC(rootCmd, "health-monitor", fmt.Sprintf("--pid=%s", generateRandomPID()), fmt.Sprintf("--config-file=%s", cfgFileHmonTest)) suite.assert.NotNil(err) suite.assert.Contains(op, "failed to start health monitor") } @@ -175,13 +191,13 @@ func (suite *hmonTestSuite) TestHmonStopPidEmpty() { } func (suite *hmonTestSuite) TestHmonStopPidInvalid() { - op, err := executeCommandC(rootCmd, "health-monitor", "stop", "--pid=12345") + op, err := executeCommandC(rootCmd, "health-monitor", "stop", fmt.Sprintf("--pid=%s", generateRandomPID())) suite.assert.NotNil(err) suite.assert.Contains(op, "failed to get health monitor pid") } func (suite *hmonTestSuite) TestHmonStopPidFailure() { - err := stop("12345") + err := stop(generateRandomPID()) suite.assert.NotNil(err) } diff --git a/cmd/imports.go b/cmd/import_windows.go similarity index 99% rename from cmd/imports.go rename to cmd/import_windows.go index f0b4f5bba..d93a764a8 100644 --- a/cmd/imports.go +++ b/cmd/import_windows.go @@ -1,3 +1,5 @@ +//go:build windows + /* _____ _____ _____ ____ ______ _____ ------ | | | | | | | | | | | | | diff --git a/component/file_cache/cache_policy_windows.go b/cmd/imports_linux.go similarity index 58% rename from component/file_cache/cache_policy_windows.go rename to cmd/imports_linux.go index bbb4be067..1693d9a2a 100644 --- a/component/file_cache/cache_policy_windows.go +++ b/cmd/imports_linux.go @@ -1,4 +1,4 @@ -//go:build windows +//go:build linux /* _____ _____ _____ ____ ______ _____ ------ @@ -12,6 +12,8 @@ Licensed under the MIT License . Copyright © 2023 Seagate Technology LLC and/or its Affiliates + Copyright © 2020-2023 Microsoft Corporation. All rights reserved. + Author : Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -32,48 +34,15 @@ SOFTWARE */ -package file_cache +package cmd import ( - "os" - "path/filepath" + _ "cloudfuse/component/attr_cache" + _ "cloudfuse/component/azstorage" + _ "cloudfuse/component/block_cache" + _ "cloudfuse/component/file_cache" + _ "cloudfuse/component/libfuse" + _ "cloudfuse/component/loopback" + _ "cloudfuse/component/s3storage" + _ "cloudfuse/component/stream" ) - -// totalSectors walks through all files in the path and gives an estimate of the total number of sectors -// that are being used. Based on https://stackoverflow.com/questions/32482673/how-to-get-directory-total-size -func totalSectors(path string) int64 { - //bytes per sector is hard coded to 4096 bytes since syscall to windows and BytesPerSector for the drive in question is an estimate. - // https://devblogs.microsoft.com/oldnewthing/20160427-00/?p=93365 - - var totalSectors int64 - err := filepath.Walk(path, func(_ string, info os.FileInfo, err error) error { - if err != nil { - return err - } - if !info.IsDir() { - totalSectors += (info.Size() / sectorSize) - if info.Size()%sectorSize != 0 { - totalSectors++ - } - } - return err - }) - - // TODO: Handle this error properly - if err != nil { - return totalSectors - } - - return totalSectors - -} - -// getUsage providse an estimate of the size on disk in MB for provided directory path string -func getUsage(path string) (float64, error) { - totalSectors := totalSectors(path) - - totalBytes := float64(totalSectors * sectorSize) - totalBytes = totalBytes / MB - - return totalBytes, nil -} diff --git a/cmd/mount.go b/cmd/mount.go index 64891d811..ac0e30f4f 100644 --- a/cmd/mount.go +++ b/cmd/mount.go @@ -91,6 +91,7 @@ type mountOptions struct { Streaming bool `config:"streaming"` AttrCache bool `config:"use-attr-cache"` LibfuseOptions []string `config:"libfuse-options"` + BlockCache bool `config:"block-cache"` } var options mountOptions @@ -281,6 +282,8 @@ var mountCmd = &cobra.Command{ if config.IsSet("streaming") && options.Streaming { pipeline = append(pipeline, "stream") + } else if config.IsSet("block-cache") && options.BlockCache { + pipeline = append(pipeline, "block_cache") } else { pipeline = append(pipeline, "file_cache") } @@ -310,16 +313,16 @@ var mountCmd = &cobra.Command{ } else if v == "allow_other" || v == "allow_other=true" { config.Set("allow-other", "true") } else if strings.HasPrefix(v, "attr_timeout=") { - config.Set("libfuse.attribute-expiration-sec", parameter[1]) + config.Set("lfuse.attribute-expiration-sec", parameter[1]) } else if strings.HasPrefix(v, "entry_timeout=") { - config.Set("libfuse.entry-expiration-sec", parameter[1]) + config.Set("lfuse.entry-expiration-sec", parameter[1]) } else if strings.HasPrefix(v, "negative_timeout=") { - config.Set("libfuse.negative-entry-expiration-sec", parameter[1]) + config.Set("lfuse.negative-entry-expiration-sec", parameter[1]) } else if v == "ro" || v == "ro=true" { config.Set("read-only", "true") } else if v == "allow_root" || v == "allow_root=true" { config.Set("allow-root", "true") - // config.Set("libfuse.default-permission", "700") + // config.Set("lfuse.default-permission", "700") } else if v == "nonempty" { skipNonEmpty = true config.Set("nonempty", "true") @@ -329,21 +332,21 @@ var mountCmd = &cobra.Command{ return fmt.Errorf("failed to parse umask [%s]", err.Error()) } perm := ^uint32(permission) & 777 - config.Set("libfuse.default-permission", fmt.Sprint(perm)) + config.Set("lfuse.default-permission", fmt.Sprint(perm)) } else if strings.HasPrefix(v, "uid=") { val, err := strconv.ParseUint(parameter[1], 10, 32) if err != nil { return fmt.Errorf("failed to parse uid [%s]", err.Error()) } - config.Set("libfuse.uid", fmt.Sprint(val)) + config.Set("lfuse.uid", fmt.Sprint(val)) } else if strings.HasPrefix(v, "gid=") { val, err := strconv.ParseUint(parameter[1], 10, 32) if err != nil { return fmt.Errorf("failed to parse gid [%s]", err.Error()) } - config.Set("libfuse.gid", fmt.Sprint(val)) + config.Set("lfuse.gid", fmt.Sprint(val)) } else if v == "direct_io" || v == "direct_io=true" { - config.Set("libfuse.direct-io", "true") + config.Set("lfuse.direct-io", "true") } else { return errors.New(common.FuseAllowedFlags) } @@ -631,6 +634,10 @@ func init() { config.BindPFlag("streaming", mountCmd.Flags().Lookup("streaming")) mountCmd.Flags().Lookup("streaming").Hidden = true + mountCmd.Flags().BoolVar(&options.BlockCache, "block-cache", false, "Enable Block-Cache.") + config.BindPFlag("block-cache", mountCmd.Flags().Lookup("block-cache")) + mountCmd.Flags().Lookup("block-cache").Hidden = true + mountCmd.Flags().BoolVar(&options.AttrCache, "use-attr-cache", true, "Use attribute caching.") config.BindPFlag("use-attr-cache", mountCmd.Flags().Lookup("use-attr-cache")) mountCmd.Flags().Lookup("use-attr-cache").Hidden = true diff --git a/cmd/mount_linux_test.go b/cmd/mount_linux_test.go index 9b64b2a4d..269b93928 100644 --- a/cmd/mount_linux_test.go +++ b/cmd/mount_linux_test.go @@ -331,6 +331,22 @@ func (suite *mountTestSuite) TestStreamAttrCacheOptionsV1() { suite.assert.Contains(op, "failed to initialize new pipeline") } +func (suite *mountTestSuite) TestBlockCacheMountWithoutRO() { + defer suite.cleanupTest() + + mntDir, err := os.MkdirTemp("", "mntdir") + suite.assert.Nil(err) + defer os.RemoveAll(mntDir) + + tempLogDir := "/tmp/templogs_" + randomString(6) + defer os.RemoveAll(tempLogDir) + + op, err := executeCommandC(rootCmd, "mount", mntDir, fmt.Sprintf("--log-file-path=%s", tempLogDir+"/blobfuse2.log"), + "--block-cache", "--use-attr-cache", "--invalidate-on-sync", "--pre-mount-validate", "--basic-remount-check") + suite.assert.NotNil(err) + suite.assert.Contains(op, "filesystem is not mounted in read-only mode") +} + // mount failure test where a libfuse option is incorrect func (suite *mountTestSuite) TestInvalidLibfuseOption() { defer suite.cleanupTest() diff --git a/common/types.go b/common/types.go index 84e217133..963b8deaf 100644 --- a/common/types.go +++ b/common/types.go @@ -47,7 +47,7 @@ import ( // Standard config default values const ( - cloudfuseVersion_ = "2.0.5" + cloudfuseVersion_ = "2.1.0" DefaultMaxLogFileSize = 512 DefaultLogFileCount = 10 diff --git a/common/util.go b/common/util.go index 29f8726e0..764a71dea 100644 --- a/common/util.go +++ b/common/util.go @@ -53,6 +53,9 @@ import ( "gopkg.in/ini.v1" ) +// Sector size of disk +const SectorSize = 4096 + var RootMount bool var ForegroundMount bool diff --git a/common/util_linux.go b/common/util_linux.go index 12ccfb1af..ac0c3a055 100644 --- a/common/util_linux.go +++ b/common/util_linux.go @@ -37,7 +37,13 @@ package common import ( + "bytes" "fmt" + "os" + "os/exec" + "strconv" + "strings" + "syscall" "golang.org/x/sys/unix" ) @@ -57,3 +63,93 @@ func NotifyMountToParent() error { return nil } + +var duPath []string = []string{"/usr/bin/du", "/usr/local/bin/du", "/usr/sbin/du", "/usr/local/sbin/du", "/sbin/du", "/bin/du"} +var selectedDuPath string = "" + +// GetUsage: The current disk usage in MB +func GetUsage(path string) (float64, error) { + var currSize float64 + var out bytes.Buffer + + if selectedDuPath == "" { + selectedDuPath = "-" + for _, dup := range duPath { + _, err := os.Stat(dup) + if err == nil { + selectedDuPath = dup + break + } + } + } + + if selectedDuPath == "-" { + return 0, fmt.Errorf("failed to find du") + } + + // du - estimates file space usage + // https://man7.org/linux/man-pages/man1/du.1.html + // Note: We cannot just pass -BM as a parameter here since it will result in less accurate estimates of the size of the path + // (i.e. du will round up to 1M if the path is smaller than 1M). + cmd := exec.Command(selectedDuPath, "-sh", path) + cmd.Stdout = &out + + err := cmd.Run() + if err != nil { + return 0, err + } + + size := strings.Split(out.String(), "\t")[0] + if size == "0" { + return 0, nil + } + + // some OS's use "," instead of "." that will not work for float parsing - replace it + size = strings.Replace(size, ",", ".", 1) + parsed, err := strconv.ParseFloat(size[:len(size)-1], 64) + if err != nil { + return 0, fmt.Errorf("failed to parse du output") + } + + switch size[len(size)-1] { + case 'K': + currSize = parsed / float64(1024) + case 'M': + currSize = parsed + case 'G': + currSize = parsed * 1024 + case 'T': + currSize = parsed * 1024 * 1024 + } + + return currSize, nil +} + +var currentUID int = -1 + +// GetDiskUsageFromStatfs: Current disk usage of temp path +func GetDiskUsageFromStatfs(path string) (float64, float64, error) { + // We need to compute the disk usage percentage for the temp path + var stat syscall.Statfs_t + err := syscall.Statfs(path, &stat) + if err != nil { + return 0, 0, err + } + + if currentUID == -1 { + currentUID = os.Getuid() + } + + var availableSpace uint64 + if currentUID == 0 { + // Sudo has mounted + availableSpace = stat.Bfree * uint64(stat.Frsize) + } else { + // non Sudo has mounted + availableSpace = stat.Bavail * uint64(stat.Frsize) + } + + totalSpace := stat.Blocks * uint64(stat.Frsize) + usedSpace := float64(totalSpace - availableSpace) + return usedSpace, float64(usedSpace) / float64(totalSpace) * 100, nil +} diff --git a/common/util_test.go b/common/util_test.go index 272e93999..02a2a31ed 100644 --- a/common/util_test.go +++ b/common/util_test.go @@ -224,3 +224,46 @@ func (suite *utilTestSuite) TestIsDriveLetter() { match = IsDriveLetter(path) suite.assert.Equal(false, match) } + +func (suite *utilTestSuite) TestGetUSage() { + pwd, err := os.Getwd() + if err != nil { + return + } + + dirName := filepath.Join(pwd, "util_test") + err = os.Mkdir(dirName, 0777) + suite.assert.Nil(err) + + data := make([]byte, 1024*1024) + err = os.WriteFile(dirName+"/1.txt", data, 0777) + suite.assert.Nil(err) + + err = os.WriteFile(dirName+"/2.txt", data, 0777) + suite.assert.Nil(err) + + usage, err := GetUsage(dirName) + suite.assert.Nil(err) + suite.assert.GreaterOrEqual(int(usage), 2) + suite.assert.LessOrEqual(int(usage), 4) + + _ = os.RemoveAll(dirName) +} + +func (suite *utilTestSuite) TestGetDiskUsage() { + pwd, err := os.Getwd() + if err != nil { + return + } + + dirName := filepath.Join(pwd, "util_test", "a", "b", "c") + err = os.MkdirAll(dirName, 0777) + suite.assert.Nil(err) + + usage, usagePercent, err := GetDiskUsageFromStatfs(dirName) + suite.assert.Nil(err) + suite.assert.NotEqual(usage, 0) + suite.assert.NotEqual(usagePercent, 0) + suite.assert.NotEqual(usagePercent, 100) + _ = os.RemoveAll(filepath.Join(pwd, "util_test")) +} diff --git a/common/util_windows.go b/common/util_windows.go index d12b30a41..f519e5e88 100644 --- a/common/util_windows.go +++ b/common/util_windows.go @@ -34,7 +34,72 @@ package common +import ( + "os" + "path/filepath" + + "golang.org/x/sys/windows" +) + // NotifyMountToParent : Does nothing on Windows func NotifyMountToParent() error { return nil } + +// totalSectors walks through all files in the path and gives an estimate of the total number of sectors +// that are being used. Based on https://stackoverflow.com/questions/32482673/how-to-get-directory-total-size +func totalSectors(path string) int64 { + //bytes per sector is hard coded to 4096 bytes since syscall to windows and BytesPerSector for the drive in question is an estimate. + // https://devblogs.microsoft.com/oldnewthing/20160427-00/?p=93365 + + var totalSectors int64 + err := filepath.Walk(path, func(_ string, info os.FileInfo, err error) error { + if err != nil { + return err + } + if !info.IsDir() { + totalSectors += (info.Size() / SectorSize) + if info.Size()%SectorSize != 0 { + totalSectors++ + } + } + return err + }) + + // TODO: Handle this error properly + if err != nil { + return totalSectors + } + + return totalSectors + +} + +// GetUsage: The current disk usage in MB +func GetUsage(path string) (float64, error) { + totalSectors := totalSectors(path) + + totalBytes := float64(totalSectors * SectorSize) + totalBytes = totalBytes / MbToBytes + + return totalBytes, nil +} + +// GetDiskUsageFromStatfs: Current disk usage of temp path +func GetDiskUsageFromStatfs(path string) (float64, float64, error) { + // We need to compute the disk usage percentage for the temp path + var free, total, avail uint64 + + // Get path to the cache + pathPtr, err := windows.UTF16PtrFromString(path) + if err != nil { + panic(err) + } + err = windows.GetDiskFreeSpaceEx(pathPtr, &free, &total, &avail) + if err != nil { + return 0, 0, err + } + + usedSpace := float64(total - avail) + return usedSpace, float64(usedSpace) / float64(total) * 100, nil +} diff --git a/component/azstorage/azauth_test.go b/component/azstorage/azauth_test.go index 97f1b872e..e5f10cd03 100644 --- a/component/azstorage/azauth_test.go +++ b/component/azstorage/azauth_test.go @@ -739,6 +739,33 @@ func (suite *authTestSuite) TestBlockInvalidTokenPathSpn() { _ = stg.SetupPipeline() } +func (suite *authTestSuite) TestBlockSpn() { + defer suite.cleanupTest() + + _ = os.WriteFile("newtoken.txt", []byte("abcdef"), 0777) + defer os.Remove("newtoken.txt") + + stgConfig := AzStorageConfig{ + container: storageTestConfigurationParameters.BlockContainer, + authConfig: azAuthConfig{ + AuthMode: EAuthType.SPN(), + AccountType: EAccountType.BLOCK(), + AccountName: storageTestConfigurationParameters.BlockAccount, + ClientID: storageTestConfigurationParameters.SpnClientId, + TenantID: storageTestConfigurationParameters.SpnTenantId, + ClientSecret: "", + Endpoint: generateEndpoint(false, storageTestConfigurationParameters.BlockAccount, EAccountType.BLOCK()), + OAuthTokenFilePath: "newtoken.txt", + }, + } + assert := assert.New(suite.T()) + stg := NewAzStorageConnection(stgConfig) + if stg == nil { + assert.Fail("TestBlockInvalidSpn : Failed to create Storage object") + } + _ = stg.SetupPipeline() +} + // func (suite *authTestSuite) TestBlockSpn() { // defer suite.cleanupTest() // stgConfig := AzStorageConfig{ diff --git a/component/azstorage/azauthmsi.go b/component/azstorage/azauthmsi.go index 70993dde5..2d28a8a30 100644 --- a/component/azstorage/azauthmsi.go +++ b/component/azstorage/azauthmsi.go @@ -35,7 +35,14 @@ package azstorage import ( + "bytes" "context" + "encoding/json" + "errors" + "fmt" + "os" + "os/exec" + "strings" "time" "cloudfuse/common/log" @@ -43,6 +50,7 @@ import ( "github.com/Azure/azure-storage-azcopy/v10/azbfs" "github.com/Azure/azure-storage-azcopy/v10/common" "github.com/Azure/azure-storage-blob-go/azblob" + "github.com/Azure/go-autorest/autorest/adal" ) // Verify that the Auth implement the correct AzAuth interfaces @@ -77,6 +85,74 @@ func (azmsi *azAuthMSI) fetchToken() (*common.OAuthTokenInfo, error) { return oAuthTokenInfo, nil } +// fetchTokenFromCLI : Generates a token using the Az Cli +func (azmsi *azAuthMSI) fetchTokenFromCLI() (*common.OAuthTokenInfo, error) { + resource := "https://storage.azure.com" + if azmsi.config.AuthResource != "" { + resource = azmsi.config.AuthResource + } + + commandLine := "az account get-access-token -o json --resource " + resource + if azmsi.config.TenantID != "" { + commandLine += " --tenant " + azmsi.config.TenantID + } + + cliCmd := exec.CommandContext(context.Background(), "/bin/sh", "-c", commandLine) + cliCmd.Dir = "/bin" + cliCmd.Env = os.Environ() + + var stderr bytes.Buffer + cliCmd.Stderr = &stderr + output, err := cliCmd.Output() + if err != nil { + msg := stderr.String() + var exErr *exec.ExitError + if errors.As(err, &exErr) && exErr.ExitCode() == 127 || strings.HasPrefix(msg, "'az' is not recognized") { + msg = "Azure CLI not found on path" + } + if msg == "" { + msg = err.Error() + } + return nil, fmt.Errorf(msg) + } + + log.Info("azAuthMSI::fetchTokenFromCLI : Successfully fetched token from Azure CLI : %s", output) + t := struct { + AccessToken string `json:"accessToken"` + Authority string `json:"_authority"` + ClientID string `json:"_clientId"` + ExpiresOn string `json:"expiresOn"` + IdentityProvider string `json:"identityProvider"` + IsMRRT bool `json:"isMRRT"` + RefreshToken string `json:"refreshToken"` + Resource string `json:"resource"` + TokenType string `json:"tokenType"` + UserID string `json:"userId"` + }{} + + err = json.Unmarshal(output, &t) + if err != nil { + return nil, err + } + // the Azure CLI's "expiresOn" is local time + _, err = time.ParseInLocation("2006-01-02 15:04:05.999999", t.ExpiresOn, time.Local) + if err != nil { + return nil, fmt.Errorf("error parsing token expiration time %q: %v", t.ExpiresOn, err) + } + + tokenInfo := &common.OAuthTokenInfo{ + Token: adal.Token{ + AccessToken: t.AccessToken, + RefreshToken: t.RefreshToken, + ExpiresOn: json.Number(t.ExpiresOn), + Resource: t.Resource, + Type: t.TokenType, + }, + } + + return tokenInfo, nil +} + type azAuthBlobMSI struct { azAuthMSI } @@ -85,28 +161,69 @@ type azAuthBlobMSI struct { func (azmsi *azAuthBlobMSI) getCredential() interface{} { // Generate the token based on configured inputs - token, err := azmsi.fetchToken() + var token *common.OAuthTokenInfo = nil + var err error = nil + norefresh := false + + msi_endpoint := os.Getenv("MSI_ENDPOINT") + if strings.Contains(msi_endpoint, "127.0.0.1:") { + // this might be AML workspace so try to get token using CLI + log.Info("azAuthBlobMSI::getCredential : Potential AML workspace detected") + token, err = azmsi.fetchTokenFromCLI() + if err != nil { + log.Err("azAuthBlobMSI::getCredential : %s", err.Error()) + } else if token != nil { + norefresh = true + } + } + + if token == nil { + log.Debug("azAuthBlobMSI::getCredential : Going for conventional fetchToken") + token, err = azmsi.fetchToken() + } + if err != nil { // fmt.Println(token.AccessToken) log.Err("azAuthBlobMSI::getCredential : Failed to get credential [%s]", err.Error()) return nil } - // Using token create the credential object, here also register a call back which refreshes the token - tc := azblob.NewTokenCredential(token.AccessToken, func(tc azblob.TokenCredential) time.Duration { - newToken, err := token.Refresh(context.Background()) - if err != nil { - log.Err("azAuthBlobMSI::getCredential : Failed to refresh token [%s]", err.Error()) - return 0 - } + var tc azblob.TokenCredential + if norefresh { + log.Info("azAuthBlobMSI::getCredential : MSI Token over CLI retrieved %s (%d)", token.AccessToken, token.Expires()) + // We are running in cli mode so token can not be refreshed, on expiry just get the new token + tc = azblob.NewTokenCredential(token.AccessToken, func(tc azblob.TokenCredential) time.Duration { + newToken, err := azmsi.fetchTokenFromCLI() + if err != nil { + log.Err("azAuthBlobMSI::getCredential : Failed to refresh token [%s]", err.Error()) + return 0 + } + + // set the new token value + tc.SetToken(newToken.AccessToken) + log.Debug("azAuthBlobMSI::getCredential : MSI Token retrieved %s (%d)", newToken.AccessToken, newToken.Expires()) - // set the new token value - tc.SetToken(newToken.AccessToken) - log.Debug("azAuthBlobMSI::getCredential : MSI Token retrieved %s (%d)", newToken.AccessToken, newToken.Expires()) + // Get the next token slightly before the current one expires + return time.Until(newToken.Expires()) - 10*time.Second + }) + } else { + log.Info("azAuthBlobMSI::getCredential : MSI Token retrieved %s (%d)", token.AccessToken, token.Expires()) + // Using token create the credential object, here also register a call back which refreshes the token + tc = azblob.NewTokenCredential(token.AccessToken, func(tc azblob.TokenCredential) time.Duration { + newToken, err := token.Refresh(context.Background()) + if err != nil { + log.Err("azAuthBlobMSI::getCredential : Failed to refresh token [%s]", err.Error()) + return 0 + } - // Get the next token slightly before the current one expires - return time.Until(newToken.Expires()) - 10*time.Second - }) + // set the new token value + tc.SetToken(newToken.AccessToken) + log.Debug("azAuthBlobMSI::getCredential : MSI Token retrieved %s (%d)", newToken.AccessToken, newToken.Expires()) + + // Get the next token slightly before the current one expires + return time.Until(newToken.Expires()) - 10*time.Second + }) + } return tc } @@ -118,28 +235,69 @@ type azAuthBfsMSI struct { // GetCredential : Get MSI based credentials for datalake func (azmsi *azAuthBfsMSI) getCredential() interface{} { // Generate the token based on configured inputs - token, err := azmsi.fetchToken() + var token *common.OAuthTokenInfo = nil + var err error = nil + norefresh := false + + msi_endpoint := os.Getenv("MSI_ENDPOINT") + if strings.Contains(msi_endpoint, "127.0.0.1:") { + // this might be AML workspace so try to get token using CLI + log.Info("azAuthBfsMSI::getCredential : Potential AML workspace detected") + token, err = azmsi.fetchTokenFromCLI() + if err != nil { + log.Err("azAuthBfsMSI::getCredential : %s", err.Error()) + } else if token != nil { + norefresh = true + } + } + + if token == nil { + log.Debug("azAuthBfsMSI::getCredential : Going for conventional fetchToken") + token, err = azmsi.fetchToken() + } + if err != nil { // fmt.Println(token.AccessToken) log.Err("azAuthBfsMSI::getCredential : Failed to get credential [%s]", err.Error()) return nil } - // Using token create the credential object, here also register a call back which refreshes the token - tc := azbfs.NewTokenCredential(token.AccessToken, func(tc azbfs.TokenCredential) time.Duration { - newToken, err := token.Refresh(context.Background()) - if err != nil { - log.Err("azAuthBfsMSI::getCredential : Failed to refresh token [%s]", err.Error()) - return 0 - } + var tc azbfs.TokenCredential + if norefresh { + log.Info("azAuthBfsMSI::getCredential : MSI Token over CLI retrieved %s (%d)", token.AccessToken, token.Expires()) + // We are running in cli mode so token can not be refreshed, on expiry just get the new token + tc = azbfs.NewTokenCredential(token.AccessToken, func(tc azbfs.TokenCredential) time.Duration { + newToken, err := azmsi.fetchTokenFromCLI() + if err != nil { + log.Err("azAuthBfsMSI::getCredential : Failed to refresh token [%s]", err.Error()) + return 0 + } + + // set the new token value + tc.SetToken(newToken.AccessToken) + log.Debug("azAuthBfsMSI::getCredential : MSI Token retrieved %s (%d)", newToken.AccessToken, newToken.Expires()) - // set the new token value - tc.SetToken(newToken.AccessToken) - log.Debug("azAuthBfsMSI::getCredential : MSI Token retrieved %s (%d)", newToken.AccessToken, newToken.Expires()) + // Get the next token slightly before the current one expires + return time.Until(newToken.Expires()) - 10*time.Second + }) + } else { + log.Info("azAuthBfsMSI::getCredential : MSI Token retrieved %s (%d)", token.AccessToken, token.Expires()) + // Using token create the credential object, here also register a call back which refreshes the token + tc = azbfs.NewTokenCredential(token.AccessToken, func(tc azbfs.TokenCredential) time.Duration { + newToken, err := token.Refresh(context.Background()) + if err != nil { + log.Err("azAuthBfsMSI::getCredential : Failed to refresh token [%s]", err.Error()) + return 0 + } - // Get the next token slightly before the current one expires - return time.Until(newToken.Expires()) - 10*time.Second - }) + // set the new token value + tc.SetToken(newToken.AccessToken) + log.Debug("azAuthBfsMSI::getCredential : MSI Token retrieved %s (%d)", newToken.AccessToken, newToken.Expires()) + + // Get the next token slightly before the current one expires + return time.Until(newToken.Expires()) - 10*time.Second + }) + } return tc } diff --git a/component/azstorage/azstorage.go b/component/azstorage/azstorage.go index 195fc95ec..242c98a74 100644 --- a/component/azstorage/azstorage.go +++ b/component/azstorage/azstorage.go @@ -180,8 +180,7 @@ func (az *AzStorage) Start(ctx context.Context) error { log.Debug("Starting azstorage stats collector") // This is a workaround right now to disable the input watcher thread which continuously monitors below config to change - // Running this thread continuously increases the CPU usage by 5% even when there is no activity on blobfuse2 mount path - // Lifecycle manager init is commented in the "blobfuse2-cpu-usage" branch. Blobfuse2 imports azcopy from this branch. + // Running this thread continuously increases the CPU usage by 5% even when there is no activity on cloudfuse mount path azcopyCommon.GetLifecycleMgr().EnableInputWatcher() return nil diff --git a/component/azstorage/block_blob.go b/component/azstorage/block_blob.go index 1747cfe93..098a8ca73 100644 --- a/component/azstorage/block_blob.go +++ b/component/azstorage/block_blob.go @@ -774,7 +774,9 @@ func (bb *BlockBlob) ReadBuffer(name string, offset int64, len int64) ([]byte, e func (bb *BlockBlob) ReadInBuffer(name string, offset int64, len int64, data []byte) error { // log.Trace("BlockBlob::ReadInBuffer : name %s", name) blobURL := bb.getBlobURL(name) - err := azblob.DownloadBlobToBuffer(context.Background(), blobURL, offset, len, data, bb.downloadOptions) + opt := bb.downloadOptions + opt.BlockSize = len + err := azblob.DownloadBlobToBuffer(context.Background(), blobURL, offset, len, data, opt) if err != nil { e := storeBlobErrToErr(err) diff --git a/component/block_cache/block_cache_linux.go b/component/block_cache/block_cache_linux.go new file mode 100644 index 000000000..939bf595a --- /dev/null +++ b/component/block_cache/block_cache_linux.go @@ -0,0 +1,819 @@ +//go:build linux + +/* + _____ _____ _____ ____ ______ _____ ------ + | | | | | | | | | | | | | + | | | | | | | | | | | | | + | --- | | | | |-----| |---- | | |-----| |----- ------ + | | | | | | | | | | | | | + | ____| |_____ | ____| | ____| | |_____| _____| |_____ |_____ + + + Licensed under the MIT License . + + Copyright © 2020-2023 Microsoft Corporation. All rights reserved. + Author : + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in all + copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE +*/ + +package block_cache + +import ( + "container/list" + "context" + "fmt" + "io" + "os" + "path/filepath" + "sync" + + "cloudfuse/common" + "cloudfuse/common/config" + "cloudfuse/common/log" + "cloudfuse/internal" + "cloudfuse/internal/handlemap" + + "github.com/vibhansa-msft/tlru" +) + +/* NOTES: + - Component shall have a structure which inherits "internal.BaseComponent" to participate in pipeline + - Component shall register a name and its constructor to participate in pipeline (add by default by generator) + - Order of calls : Constructor -> Configure -> Start ..... -> Stop + - To read any new setting from config file follow the Configure method default comments +*/ + +// Common structure for Component +type BlockCache struct { + internal.BaseComponent + + blockSize uint64 // Size of each block to be cached + memSize uint64 // Mem size to be used for caching at the startup + tmpPath string // Disk path where these blocks will be cached + diskSize uint64 // Size of disk space allocated for the caching + diskTimeout uint32 // Timeout for which disk blocks will be cached + workers uint32 // Number of threads working to fetch the blocks + prefetch uint32 // Number of blocks to be prefetched + diskPolicy *tlru.TLRU // Disk cache eviction policy + blockPool *BlockPool // Pool of blocks + threadPool *ThreadPool // Pool of threads + fileLocks *common.LockMap // Locks for each file_blockid to avoid multiple threads to fetch same block + fileNodeMap sync.Map // Map holding files that are there in our cache + maxDiskUsageHit bool // Flag to indicate if we have hit max disk usage + noPrefetch bool // Flag to indicate if prefetch is disabled + prefetchOnOpen bool // Start prefetching on file open call instead of waiting for first read +} + +// Structure defining your config parameters +type BlockCacheOptions struct { + BlockSize uint64 `config:"block-size-mb" yaml:"block-size-mb,omitempty"` + MemSize uint64 `config:"mem-size-mb" yaml:"mem-size-mb,omitempty"` + TmpPath string `config:"path" yaml:"path,omitempty"` + DiskSize uint64 `config:"disk-size-mb" yaml:"disk-size-mb,omitempty"` + DiskTimeout uint32 `config:"disk-timeout-sec" yaml:"timeout-sec,omitempty"` + PrefetchCount uint32 `config:"prefetch" yaml:"prefetch,omitempty"` + Workers uint32 `config:"parallelism" yaml:"parallelism,omitempty"` + PrefetchOnOpen bool `config:"prefetch-on-open" yaml:"prefetch-on-open,omitempty"` +} + +const ( + compName = "block_cache" + defaultTimeout = 120 + MAX_POOL_USAGE uint32 = 80 + MIN_POOL_USAGE uint32 = 50 + MIN_PREFETCH = 5 + MIN_RANDREAD = 10 + MAX_FAIL_CNT = 3 +) + +// Verification to check satisfaction criteria with Component Interface +var _ internal.Component = &BlockCache{} + +func (bc *BlockCache) Name() string { + return compName +} + +func (bc *BlockCache) SetName(name string) { + bc.BaseComponent.SetName(name) +} + +func (bc *BlockCache) SetNextComponent(nc internal.Component) { + bc.BaseComponent.SetNextComponent(nc) +} + +// Start : Pipeline calls this method to start the component functionality +// +// this shall not Block the call otherwise pipeline will not start +func (bc *BlockCache) Start(ctx context.Context) error { + log.Trace("BlockCache::Start : Starting component %s", bc.Name()) + + // Start the thread pool and keep it ready for download + bc.threadPool.Start() + + // If disk caching is enabled then start the disk eviction policy + if bc.tmpPath != "" { + err := bc.diskPolicy.Start() + if err != nil { + log.Err("BlockCache::Start : failed to start diskpolicy [%s]", err.Error()) + return fmt.Errorf("failed to start disk-policy for block-cache") + } + } + + return nil +} + +// Stop : Stop the component functionality and kill all threads started +func (bc *BlockCache) Stop() error { + log.Trace("BlockCache::Stop : Stopping component %s", bc.Name()) + + // Wait for thread pool to stop + bc.threadPool.Stop() + + // Clear the disk cache on exit + if bc.tmpPath != "" { + _ = bc.diskPolicy.Stop() + _ = bc.TempCacheCleanup() + } + + return nil +} + +// TempCacheCleanup cleans up the local cached contents +func (bc *BlockCache) TempCacheCleanup() error { + if bc.tmpPath == "" { + return nil + } + + log.Info("BlockCache::TempCacheCleanup : Cleaning up temp directory %s", bc.tmpPath) + + dirents, err := os.ReadDir(bc.tmpPath) + if err != nil { + log.Err("BlockCache::TempCacheCleanup : Failed to list directory %s [%v]", bc.tmpPath, err.Error()) + return nil + } + + for _, entry := range dirents { + os.RemoveAll(filepath.Join(bc.tmpPath, entry.Name())) + } + + return nil +} + +// Configure : Pipeline will call this method after constructor so that you can read config and initialize yourself +// +// Return failure if any config is not valid to exit the process +func (bc *BlockCache) Configure(_ bool) error { + log.Trace("BlockCache::Configure : %s", bc.Name()) + + readonly := false + err := config.UnmarshalKey("read-only", &readonly) + if err != nil { + log.Err("BlockCache::Configure : config error [unable to obtain read-only]") + return fmt.Errorf("BlockCache: unable to obtain read-only") + } + + // Currently we support readonly mode + if !readonly { + log.Err("BlockCache::Configure : config error [filesystem is not mounted in read-only mode]") + return fmt.Errorf("BlockCache: filesystem is not mounted in read-only mode") + } + + conf := BlockCacheOptions{} + err = config.UnmarshalKey(bc.Name(), &conf) + if err != nil { + log.Err("BlockCache::Configure : config error [invalid config attributes]") + return fmt.Errorf("BlockCache: config error [invalid config attributes]") + } + + bc.blockSize = uint64(16) * _1MB + if config.IsSet(compName + ".block-size-mb") { + bc.blockSize = conf.BlockSize * _1MB + + } + + bc.memSize = uint64(4192) * _1MB + if config.IsSet(compName + ".mem-size-mb") { + bc.memSize = conf.MemSize * _1MB + } + + bc.diskSize = uint64(4192) + if config.IsSet(compName + ".disk-size-mb") { + bc.diskSize = conf.DiskSize + } + bc.diskTimeout = defaultTimeout + if config.IsSet(compName + ".disk-timeout-sec") { + bc.diskTimeout = conf.DiskTimeout + } + + bc.prefetchOnOpen = conf.PrefetchOnOpen + bc.prefetch = MIN_PREFETCH + bc.noPrefetch = false + + if config.IsSet(compName + ".prefetch") { + bc.prefetch = conf.PrefetchCount + if bc.prefetch == 0 { + bc.noPrefetch = true + } else if conf.PrefetchCount <= (MIN_PREFETCH * 2) { + log.Err("BlockCache::Configure : Prefetch count can not be less then %v", (MIN_PREFETCH*2)+1) + return fmt.Errorf("BlockCache: invalid config for prefetch count") + } + } + + bc.maxDiskUsageHit = false + + bc.workers = 128 + if config.IsSet(compName + ".parallelism") { + bc.workers = conf.Workers + } + + bc.tmpPath = "" + if conf.TmpPath != "" { + bc.tmpPath = common.ExpandPath(conf.TmpPath) + + // Extract values from 'conf' and store them as you wish here + _, err = os.Stat(bc.tmpPath) + if os.IsNotExist(err) { + log.Info("BlockCache: config error [tmp-path does not exist. attempting to create tmp-path.]") + err := os.Mkdir(bc.tmpPath, os.FileMode(0755)) + if err != nil { + log.Err("BlockCache: config error creating directory after clean [%s]", err.Error()) + return fmt.Errorf("config error in %s [%s]", bc.Name(), err.Error()) + } + } + } + + log.Info("BlockCache::Configure : block size %v, mem size %v, worker %v, prefeth %v, disk path %v, max size %vMB, disk timeout %v", + bc.blockSize, bc.memSize, bc.workers, bc.prefetch, bc.tmpPath, bc.diskSize, bc.diskTimeout) + + bc.blockPool = NewBlockPool(bc.blockSize, bc.memSize) + if bc.blockPool == nil { + log.Err("BlockCache::Configure : fail to init Block pool") + return fmt.Errorf("BlockCache: failed to init Block pool") + } + + bc.threadPool = newThreadPool(bc.workers, bc.download) + if bc.threadPool == nil { + log.Err("BlockCache::Configure : fail to init thread pool") + return fmt.Errorf("BlockCache: failed to init thread pool") + } + + if bc.tmpPath != "" { + bc.diskPolicy, err = tlru.New(uint32((bc.diskSize*_1MB)/bc.blockSize), bc.diskTimeout, bc.diskEvict, 60, bc.checkDiskUsage) + if err != nil { + log.Err("BlockCache::Configure : fail to create LRU for memory nodes [%s]", err.Error()) + return fmt.Errorf("BlockCache: fail to create LRU for memory nodes") + } + } + + return nil +} + +// OpenFile: Create a handle for the file user has requested to open +func (bc *BlockCache) OpenFile(options internal.OpenFileOptions) (*handlemap.Handle, error) { + log.Trace("BlockCache::OpenFile : name=%s, flags=%d, mode=%s", options.Name, options.Flags, options.Mode) + + attr, err := bc.NextComponent().GetAttr(internal.GetAttrOptions{Name: options.Name}) + if err != nil { + log.Err("BlockCache::OpenFile : Failed to get attr of %s [%s]", options.Name, err.Error()) + return nil, err + } + + handle := handlemap.NewHandle(options.Name) + handle.Size = attr.Size + handle.Mtime = attr.Mtime + + // Set next offset to download as 0 + // We may not download this if first read starts with some other offset + handle.SetValue("#", (uint64)(0)) + + // Allocate a block pool object for this handle + // Actual linked list to hold the nodes + handle.Buffers = &handlemap.Buffers{ + Cooked: list.New(), // List to hold free blocks + Cooking: list.New(), // List to hold blocks still under download + } + + if handle.Size < int64(bc.blockSize) { + // File is small and can fit in one block itself + _ = bc.refreshBlock(handle, 0, false) + } else if bc.prefetchOnOpen && !bc.noPrefetch { + // Prefetch to start on open + _ = bc.startPrefetch(handle, 0, false) + } + + return handle, nil +} + +// CloseFile: File is closed by application so release all the blocks and submit back to blockPool +func (bc *BlockCache) CloseFile(options internal.CloseFileOptions) error { + log.Trace("BlockCache::CloseFile : name=%s, handle=%d", options.Handle.Path, options.Handle.ID) + + // Release the blocks that are in use and wipe out handle map + options.Handle.Cleanup() + + // Release the buffers which are still under download after they have been written + blockList := options.Handle.Buffers.Cooking + node := blockList.Front() + for ; node != nil; node = blockList.Front() { + // Due to prefetch there might be some downloads still going on + block := blockList.Remove(node).(*Block) + + // Wait for download to complete and then free up this block + <-block.state + block.ReUse() + bc.blockPool.Release(block) + } + options.Handle.Buffers.Cooking = nil + + // Release the blocks that are ready to be reused + blockList = options.Handle.Buffers.Cooked + node = blockList.Front() + for ; node != nil; node = blockList.Front() { + block := blockList.Remove(node).(*Block) + block.ReUse() + bc.blockPool.Release(block) + } + options.Handle.Buffers.Cooked = nil + + return nil +} + +// ReadInBuffer: Read the file into a buffer +func (bc *BlockCache) ReadInBuffer(options internal.ReadInBufferOptions) (int, error) { + if options.Offset >= options.Handle.Size { + // EOF reached so early exit + return 0, io.EOF + } + + // As of now we allow only one operation on a handle at a time + // This simplifies the logic of block-cache otherwise we will have to handle + // a lot of race conditions and logic becomes complex and sub-performant + options.Handle.Lock() + defer options.Handle.Unlock() + + // Keep getting next blocks until you read the request amount of data + dataRead := int(0) + for dataRead < len(options.Data) { + block, err := bc.getBlock(options.Handle, uint64(options.Offset)) + if err != nil { + if err != io.EOF { + log.Err("BlockCache::ReadInBuffer : Failed to get Block %v=>%s offset %v [%v]", options.Handle.ID, options.Handle.Path, options.Offset, err.Error()) + } + return dataRead, err + } + + // Copy data from this block to user buffer + readOffset := uint64(options.Offset) - block.offset + bytesRead := copy(options.Data[dataRead:], block.data[readOffset:]) + + // Move offset forward in case we need to copy more data + options.Offset += int64(bytesRead) + dataRead += bytesRead + } + + return dataRead, nil +} + +// getBlock: From offset generate the Block index and get the Block corrosponding to it +/* Base logic of getBlock: +Check if the given block is already available or not +if not + if this is the first read for this file start prefetching of blocks from given offset + if this is not first read, consider this to be a random read case and start prefetch from given offset + once the random read count reaches a limit, this prefetching will be turned off + in either case this prefetching will add the block index to the map + so search the map again now +Once block is available +if you are first reader of this block + its time to prefetch next block(s) based on how much we can prefetch + Once you queue up the required prefetch mark this block as open to read + so that others can come and freely read this block + First reader here has responsibility to remove an old used block and lineup download for next blocks +Return this block once prefetch is queued and block is marked open for all +*/ +func (bc *BlockCache) getBlock(handle *handlemap.Handle, readoffset uint64) (*Block, error) { + if readoffset >= uint64(handle.Size) { + return nil, io.EOF + } + + // Check the given block index is already available or not + index := bc.getBlockIndex(readoffset) + node, found := handle.GetValue(fmt.Sprintf("%v", index)) + if !found { + // If this is the first read request then prefetch all required nodes + val, _ := handle.GetValue("#") + if !bc.noPrefetch && val.(uint64) == 0 { + log.Debug("BlockCache::getBlock : Starting the prefetch %v=>%s (offset %v, index %v)", handle.ID, handle.Path, readoffset, index) + + // This is the first read for this file handle so start prefetching all the nodes + err := bc.startPrefetch(handle, index, false) + if err != nil && err != io.EOF { + log.Err("BlockCache::getBlock : Unable to start prefetch %v=>%s (offset %v, index %v) [%s]", handle.ID, handle.Path, readoffset, index, err.Error()) + return nil, err + } + } else { + // This is a case of random read so increment the random read count + handle.OptCnt++ + + log.Debug("BlockCache::getBlock : Unable to get block %v=>%s (offset %v, index %v) Random %v", handle.ID, handle.Path, readoffset, index, handle.OptCnt) + + // This block is not present even after prefetch so lets download it now + err := bc.startPrefetch(handle, index, false) + if err != nil && err != io.EOF { + log.Err("BlockCache::getBlock : Unable to start prefetch %v=>%s (offset %v, index %v) [%s]", handle.ID, handle.Path, readoffset, index, err.Error()) + return nil, err + } + } + + // This node was not found so above logic should have queued it up, retry searching now + node, found = handle.GetValue(fmt.Sprintf("%v", index)) + if !found { + log.Err("BlockCache::getBlock : Failed to get the required block %v=>%s (offset %v, index %v)", handle.ID, handle.Path, readoffset, index) + return nil, fmt.Errorf("not able to find block immediately after scheudling") + } + } + + // We have the block now which we wish to read + block := node.(*Block) + + // Wait for this block to complete the download + t := int(0) + t = <-block.state + + if t == 1 { + // Download complete and you are first reader of this block + if handle.OptCnt <= MIN_RANDREAD { + // So far this file has been read sequentially so prefetch more + val, _ := handle.GetValue("#") + if int64(val.(uint64)*bc.blockSize) < handle.Size { + _ = bc.startPrefetch(handle, val.(uint64), true) + } + } + + // This block was moved to in-process queue as download is complete lets move it back to normal queue + _ = handle.Buffers.Cooking.Remove(block.node) + block.node = handle.Buffers.Cooked.PushBack(block) + + // Mark this block is now open for everyone to read and process + // Once unblocked and moved to original queue, any instance can delete this block to reuse as well + block.Unblock() + } + + return block, nil +} + +// getBlockIndex: From offset get the block index +func (bc *BlockCache) getBlockIndex(offset uint64) uint64 { + return offset / bc.blockSize +} + +// startPrefetch: Start prefetchign the blocks from given offset. Same method is used to download currently required block as well +func (bc *BlockCache) startPrefetch(handle *handlemap.Handle, index uint64, prefetch bool) error { + // Calculate how many buffers we have in free and in-process queue + currentCnt := handle.Buffers.Cooked.Len() + handle.Buffers.Cooking.Len() + cnt := uint32(0) + + if handle.OptCnt > MIN_RANDREAD { + // This handle has been read randomly and we have reached the threshold to declare a random read case + + if currentCnt > MIN_PREFETCH { + // As this file is in random read mode now, release the excess buffers. Just keep 5 buffers for it to work + log.Debug("BlockCache::startPrefetch : Cleanup excessive blocks %v=>%s index %v", handle.ID, handle.Path, index) + + // As this is random read move all in process blocks to free list + nodeList := handle.Buffers.Cooking + currentCnt = nodeList.Len() + node := nodeList.Front() + + for i := 0; node != nil && i < currentCnt; node = nodeList.Front() { + // Test whether this block is already downloaded or still under download + block := handle.Buffers.Cooking.Remove(node).(*Block) + block.node = nil + i++ + + select { + case <-block.state: + // As we are first reader of this block here its important to unblock any future readers on this block + block.Unblock() + + // Block is downloaded so it's safe to ready it for reuse + block.node = handle.Buffers.Cooked.PushBack(block) + + default: + // Block is still under download so can not reuse this + block.node = handle.Buffers.Cooking.PushBack(block) + } + } + + // Now remove excess blocks from cooked list + nodeList = handle.Buffers.Cooked + currentCnt = nodeList.Len() + node = nodeList.Front() + + for ; node != nil && currentCnt > MIN_PREFETCH; node = nodeList.Front() { + block := node.Value.(*Block) + _ = nodeList.Remove(node) + + // Remove entry of this block from map so that no one can find it + handle.RemoveValue(fmt.Sprintf("%v", block.id)) + block.node = nil + + // Submit this block back to pool for reuse + block.ReUse() + bc.blockPool.Release(block) + + currentCnt-- + } + } + // As we were asked to download a block, for random read case download only the requested block + // This is where prefetching is blocked now as we download just the block which is requested + cnt = 1 + } else { + // This handle is having sequential reads so far + // Allocate more buffers if required until we hit the prefetch count limit + for ; currentCnt < int(bc.prefetch) && cnt < MIN_PREFETCH; currentCnt++ { + block := bc.blockPool.TryGet() + if block != nil { + block.node = handle.Buffers.Cooked.PushFront(block) + cnt++ + } + } + + // If no new buffers were allocated then we have all buffers allocated to this handle already + // time to switch to a sliding window where we remove one block and lineup a new block for download + if cnt == 0 { + cnt = 1 + } + } + + for i := uint32(0); i < cnt; i++ { + // Revalidate this node does not exists in the block map + _, found := handle.GetValue(fmt.Sprintf("%v", index)) + if !found { + // Block not found so lets push it for download + err := bc.refreshBlock(handle, index, prefetch || i > 0) + if err != nil { + return err + } + index++ + } + } + + return nil +} + +// refreshBlock: Get a block from the list and prepare it for download +func (bc *BlockCache) refreshBlock(handle *handlemap.Handle, index uint64, prefetch bool) error { + log.Debug("BlockCache::refreshBlock : Request to download %v=>%s (index %v, prefetch %v)", handle.ID, handle.Path, index, prefetch) + + // Convert index to offset + offset := index * bc.blockSize + if int64(offset) >= handle.Size { + // We have reached EOF so return back no need to download anything here + return io.EOF + } + + nodeList := handle.Buffers.Cooked + if nodeList.Len() == 0 && !prefetch { + // User needs a block now but there is no free block available right now + // this might happen when all blocks are under download and no first reader is hit for any of them + block := bc.blockPool.MustGet() + if block == nil { + log.Err("BlockCache::refreshBlock : Unable to allocate block %v=>%s (index %v, prefetch %v)", handle.ID, handle.Path, index, prefetch) + return fmt.Errorf("unable to allocate block") + } + + block.node = handle.Buffers.Cooked.PushFront(block) + } + + node := nodeList.Front() + if node != nil { + // Now there is at least one free block available in the list + block := node.Value.(*Block) + + if block.id != -1 { + // This is a reuse of a block case so we need to remove old entry from the map + handle.RemoveValue(fmt.Sprintf("%v", block.id)) + } + + // Reuse this block and lineup for download + block.ReUse() + block.id = int64(index) + block.offset = offset + + // Add this entry to handle map so that others can refer to the same block if required + handle.SetValue(fmt.Sprintf("%v", index), block) + handle.SetValue("#", (index + 1)) + + bc.lineupDownload(handle, block, prefetch) + } + + return nil +} + +// lineupDownload : Create a work item and schedule the download +func (bc *BlockCache) lineupDownload(handle *handlemap.Handle, block *Block, prefetch bool) { + item := &workItem{ + handle: handle, + block: block, + prefetch: prefetch, + failCnt: 0, + } + + // Remove this block from free block list and add to in-process list + if block.node != nil { + _ = handle.Buffers.Cooked.Remove(block.node) + } + + block.node = handle.Buffers.Cooking.PushFront(block) + + // Send the work item to worker pool to schedule download + bc.threadPool.Schedule(!prefetch, item) +} + +// download : Method to download the given amount of data +func (bc *BlockCache) download(item *workItem) { + fileName := fmt.Sprintf("%s::%v", item.handle.Path, item.block.id) + + // filename_blockindex is the key for the lock + // this ensure that at a given time a block from a file is downloaded only once across all open handles + flock := bc.fileLocks.Get(fileName) + flock.Lock() + defer flock.Unlock() + + var diskNode any + found := false + localPath := "" + + if bc.tmpPath != "" { + // Update diskpolicy to reflect the new file + diskNode, found = bc.fileNodeMap.Load(fileName) + if !found { + diskNode = bc.diskPolicy.Add(fileName) + bc.fileNodeMap.Store(fileName, diskNode) + } else { + bc.diskPolicy.Refresh(diskNode.(*list.Element)) + } + + // Check local file exists for this offset and file combination or not + localPath = filepath.Join(bc.tmpPath, fileName) + _, err := os.Stat(localPath) + + if err == nil { + // If file exists then read the block from the local file + f, err := os.Open(localPath) + if err != nil { + // On any disk failure we do not fail the download flow + log.Err("BlockCache::download : Failed to open file %s [%s]", fileName, err.Error()) + _ = os.Remove(localPath) + } else { + _, err = f.Read(item.block.data) + if err != nil { + log.Err("BlockCache::download : Failed to read data from disk cache %s [%s]", fileName, err.Error()) + f.Close() + _ = os.Remove(localPath) + } + + f.Close() + // We have read the data from disk so there is no need to go over network + // Just mark the block that download is complete + item.block.ReadyForReading() + return + } + } + } + + // If file does not exists then download the block from the container + n, err := bc.NextComponent().ReadInBuffer(internal.ReadInBufferOptions{ + Handle: item.handle, + Offset: int64(item.block.offset), + Data: item.block.data, + }) + + if item.failCnt > MAX_FAIL_CNT { + // If we failed to read the data 3 times then just give up + log.Err("BlockCache::download : 3 attempts to download a block have failed %v=>%s (index %v, offset %v)", item.handle.ID, item.handle.Path, item.block.id, item.block.offset) + return + } + + if err != nil { + // Fail to read the data so just reschedule this request + log.Err("BlockCache::download : Failed to read %v=>%s from offset %v [%s]", item.handle.ID, item.handle.Path, item.block.id, err.Error()) + item.failCnt++ + bc.threadPool.Schedule(false, item) + return + } else if n == 0 { + // No data read so just reschedule this request + log.Err("BlockCache::download : Failed to read %v=>%s from offset %v [0 bytes read]", item.handle.ID, item.handle.Path, item.block.id) + item.failCnt++ + bc.threadPool.Schedule(false, item) + return + } + + if bc.tmpPath != "" { + // Dump this block to local disk cache + f, err := os.Create(localPath) + if err == nil { + _, err := f.Write(item.block.data) + if err != nil { + log.Err("BlockCache::download : Failed to write %s to disk [%v]", localPath, err.Error()) + _ = os.Remove(localPath) + } + + f.Close() + bc.diskPolicy.Refresh(diskNode.(*list.Element)) + } + } + + // Just mark the block that download is complete + item.block.ReadyForReading() +} + +// diskEvict : Callback when a node from disk expires +func (bc *BlockCache) diskEvict(node *list.Element) { + fileName := node.Value.(string) + + // Lock the file name so that its not downloaded when deletion is going on + flock := bc.fileLocks.Get(fileName) + flock.Lock() + defer flock.Unlock() + + bc.fileNodeMap.Delete(fileName) + + localPath := filepath.Join(bc.tmpPath, fileName) + _ = os.Remove(localPath) +} + +// checkDiskUsage : Callback to check usage of disk and decide whether eviction is needed +func (bc *BlockCache) checkDiskUsage() bool { + data, _ := common.GetUsage(bc.tmpPath) + usage := uint32((data * 100) / float64(bc.diskSize)) + + if bc.maxDiskUsageHit { + if usage >= MIN_POOL_USAGE { + return true + } + bc.maxDiskUsageHit = false + } else { + if usage >= MAX_POOL_USAGE { + bc.maxDiskUsageHit = true + return true + } + } + + log.Info("BlockCache::checkDiskUsage : current disk usage : %fMB %v%%", data, usage) + log.Info("BlockCache::checkDiskUsage : current cache usage : %v%%", bc.blockPool.Usage()) + return false +} + +// ------------------------- Factory ------------------------------------------- +// Pipeline will call this method to create your object, initialize your variables here +// << DO NOT DELETE ANY AUTO GENERATED CODE HERE >> +func NewBlockCacheComponent() internal.Component { + comp := &BlockCache{ + fileLocks: common.NewLockMap(), + } + comp.SetName(compName) + return comp +} + +// On init register this component to pipeline and supply your constructor +func init() { + internal.AddComponent(compName, NewBlockCacheComponent) + + blockSizeMb := config.AddUint64Flag("block-cache-block-size", 0, "Size (in MB) of a block to be downloaded for block-cache.") + config.BindPFlag(compName+".block-size-mb", blockSizeMb) + + blockPoolMb := config.AddUint64Flag("block-cache-pool-size", 0, "Size (in MB) of total memory preallocated for block-cache.") + config.BindPFlag(compName+".mem-size-mb", blockPoolMb) + + blockCachePath := config.AddStringFlag("block-cache-path", "", "Path to store downloaded blocks.") + config.BindPFlag(compName+".path", blockCachePath) + + blockDiskMb := config.AddUint64Flag("block-cache-disk-size", 0, "Size (in MB) of total disk capacity that block-cache can use.") + config.BindPFlag(compName+".disk-size-mb", blockDiskMb) + + blockCachePrefetch := config.AddUint32Flag("block-cache-prefetch", 0, "Max number of blocks to prefetch.") + config.BindPFlag(compName+".prefetch", blockCachePrefetch) + + blockCachePrefetchOnOpen := config.AddBoolFlag("block-cache-prefetch-on-open", false, "Start prefetching on open or wait for first read.") + config.BindPFlag(compName+".prefetch-on-open", blockCachePrefetchOnOpen) + +} diff --git a/component/block_cache/block_cache_linux_test.go b/component/block_cache/block_cache_linux_test.go new file mode 100644 index 000000000..dcf21ce47 --- /dev/null +++ b/component/block_cache/block_cache_linux_test.go @@ -0,0 +1,498 @@ +//go:build linux + +/* + _____ _____ _____ ____ ______ _____ ------ + | | | | | | | | | | | | | + | | | | | | | | | | | | | + | --- | | | | |-----| |---- | | |-----| |----- ------ + | | | | | | | | | | | | | + | ____| |_____ | ____| | ____| | |_____| _____| |_____ |_____ + + + Licensed under the MIT License . + + Copyright © 2020-2023 Microsoft Corporation. All rights reserved. + Author : + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in all + copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE +*/ + +package block_cache + +import ( + "context" + "fmt" + "io/ioutil" + "math/rand" + "os" + "path/filepath" + "strings" + "testing" + "time" + + "cloudfuse/common" + "cloudfuse/common/config" + "cloudfuse/common/log" + "cloudfuse/component/loopback" + "cloudfuse/internal" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/suite" +) + +var home_dir, _ = os.UserHomeDir() + +type blockCacheTestSuite struct { + suite.Suite + assert *assert.Assertions +} + +func (suite *blockCacheTestSuite) SetupTest() { + suite.assert = assert.New(suite.T()) +} + +type testObj struct { + fake_storage_path string + disk_cache_path string + loopback internal.Component + blockCache *BlockCache +} + +func randomString(length int) string { + rand.Seed(time.Now().UnixNano()) + b := make([]byte, length) + rand.Read(b) + return fmt.Sprintf("%x", b)[:length] +} + +func getFakeStoragePath(base string) string { + tmp_path := filepath.Join(home_dir, base+randomString(8)) + _ = os.Mkdir(tmp_path, 0777) + return tmp_path +} + +func setupPipeline(cfg string) (*testObj, error) { + tobj := &testObj{ + fake_storage_path: getFakeStoragePath("block_cache"), + disk_cache_path: getFakeStoragePath("fake_storage"), + } + + if cfg == "" { + cfg = fmt.Sprintf("read-only: true\n\nloopbackfs:\n path: %s\n\nblock_cache:\n block-size-mb: 1\n mem-size-mb: 20\n prefetch: 12\n parallelism: 10\n path: %s\n disk-size-mb: 50\n disk-timeout-sec: 20", tobj.fake_storage_path, tobj.disk_cache_path) + } else { + cfg = fmt.Sprintf("%s\n\nloopbackfs:\n path: %s\n", cfg, tobj.fake_storage_path) + } + + config.ReadConfigFromReader(strings.NewReader(cfg)) + + tobj.loopback = loopback.NewLoopbackFSComponent() + err := tobj.loopback.Configure(true) + if err != nil { + return nil, fmt.Errorf("Unable to configure loopback [%s]", err.Error()) + } + + tobj.blockCache = NewBlockCacheComponent().(*BlockCache) + tobj.blockCache.SetNextComponent(tobj.loopback) + err = tobj.blockCache.Configure(true) + if err != nil { + return nil, fmt.Errorf("Unable to configure blockcache [%s]", err.Error()) + } + + err = tobj.loopback.Start(context.Background()) + if err != nil { + return nil, fmt.Errorf("Unable to start loopback [%s]", err.Error()) + } + + err = tobj.blockCache.Start(context.Background()) + if err != nil { + return nil, fmt.Errorf("Unable to start blockcache [%s]", err.Error()) + } + + return tobj, nil +} + +func (tobj *testObj) cleanupPipeline() error { + if tobj == nil { + return nil + } + + if tobj.loopback != nil { + err := tobj.loopback.Stop() + if err != nil { + return fmt.Errorf("Unable to stop loopback [%s]", err.Error()) + } + } + + if tobj.blockCache != nil { + err := tobj.blockCache.Stop() + if err != nil { + return fmt.Errorf("Unable to stop block cache [%s]", err.Error()) + } + } + + os.RemoveAll(tobj.fake_storage_path) + os.RemoveAll(tobj.disk_cache_path) + + return nil +} + +// Tests the default configuration of block cache +func (suite *blockCacheTestSuite) TestEmpty() { + emptyConfig := "read-only: true" + tobj, err := setupPipeline(emptyConfig) + defer tobj.cleanupPipeline() + + suite.assert.Nil(err) + suite.assert.Equal(tobj.blockCache.Name(), "block_cache") + suite.assert.EqualValues(tobj.blockCache.blockSize, 16*_1MB) + suite.assert.EqualValues(tobj.blockCache.memSize, 4192*_1MB) + suite.assert.EqualValues(tobj.blockCache.diskSize, 4192) + suite.assert.EqualValues(tobj.blockCache.diskTimeout, defaultTimeout) + suite.assert.EqualValues(tobj.blockCache.workers, 128) + suite.assert.EqualValues(tobj.blockCache.prefetch, MIN_PREFETCH) + suite.assert.EqualValues(tobj.blockCache.noPrefetch, false) + suite.assert.NotNil(tobj.blockCache.blockPool) + suite.assert.NotNil(tobj.blockCache.threadPool) +} + +func (suite *blockCacheTestSuite) TestNonROMount() { + emptyConfig := "read-only: false" + tobj, err := setupPipeline(emptyConfig) + + suite.assert.NotNil(err) + suite.assert.Nil(tobj) + suite.assert.Contains(err.Error(), "filesystem is not mounted in read-only mode") +} + +func (suite *blockCacheTestSuite) TestInvalidPrefetchCount() { + cfg := "read-only: true\n\nblock_cache:\n block-size-mb: 16\n mem-size-mb: 500\n prefetch: 8\n parallelism: 10\n path: abcd\n disk-size-mb: 100\n disk-timeout-sec: 5" + tobj, err := setupPipeline(cfg) + defer tobj.cleanupPipeline() + + suite.assert.NotNil(err) + suite.assert.Contains(err.Error(), "invalid config for prefetch count") +} + +func (suite *blockCacheTestSuite) TestNoPrefetchConfig() { + cfg := "read-only: true\n\nblock_cache:\n block-size-mb: 1\n mem-size-mb: 500\n prefetch: 0\n parallelism: 10\n path: abcd\n disk-size-mb: 100\n disk-timeout-sec: 5" + tobj, err := setupPipeline(cfg) + defer tobj.cleanupPipeline() + + suite.assert.Nil(err) + suite.assert.NotNil(tobj.blockCache) + suite.assert.Equal(tobj.blockCache.noPrefetch, true) +} + +func (suite *blockCacheTestSuite) TestInvalidDiskPath() { + cfg := "read-only: true\n\nblock_cache:\n block-size-mb: 16\n mem-size-mb: 500\n prefetch: 12\n parallelism: 10\n path: /abcd\n disk-size-mb: 100\n disk-timeout-sec: 5" + tobj, err := setupPipeline(cfg) + defer tobj.cleanupPipeline() + + suite.assert.NotNil(err) + suite.assert.Contains(err.Error(), "permission denied") +} + +func (suite *blockCacheTestSuite) TestManualConfig() { + cfg := "read-only: true\n\nblock_cache:\n block-size-mb: 16\n mem-size-mb: 500\n prefetch: 12\n parallelism: 10\n path: abcd\n disk-size-mb: 100\n disk-timeout-sec: 5" + tobj, err := setupPipeline(cfg) + defer tobj.cleanupPipeline() + + suite.assert.Nil(err) + suite.assert.Equal(tobj.blockCache.Name(), "block_cache") + suite.assert.EqualValues(tobj.blockCache.blockSize, 16*_1MB) + suite.assert.EqualValues(tobj.blockCache.memSize, 500*_1MB) + suite.assert.EqualValues(tobj.blockCache.workers, 10) + suite.assert.EqualValues(tobj.blockCache.diskSize, 100) + suite.assert.EqualValues(tobj.blockCache.diskTimeout, 5) + suite.assert.EqualValues(tobj.blockCache.prefetch, 12) + suite.assert.EqualValues(tobj.blockCache.workers, 10) + + suite.assert.NotNil(tobj.blockCache.blockPool) +} + +func (suite *blockCacheTestSuite) TestOpenFileFail() { + tobj, err := setupPipeline("") + defer tobj.cleanupPipeline() + + suite.assert.Nil(err) + suite.assert.NotNil(tobj.blockCache) + + path := "a" + options := internal.OpenFileOptions{Name: path} + h, err := tobj.blockCache.OpenFile(options) + suite.assert.NotNil(err) + suite.assert.Nil(h) + suite.assert.Contains(err.Error(), "no such file or directory") +} + +func (suite *blockCacheTestSuite) TestFileOpneClose() { + tobj, err := setupPipeline("") + defer tobj.cleanupPipeline() + + suite.assert.Nil(err) + suite.assert.NotNil(tobj.blockCache) + + fileName := "bc.tst" + stroagePath := filepath.Join(tobj.fake_storage_path, fileName) + data := make([]byte, 5*_1MB) + _, _ = rand.Read(data) + ioutil.WriteFile(stroagePath, data, 0777) + + options := internal.OpenFileOptions{Name: fileName} + h, err := tobj.blockCache.OpenFile(options) + suite.assert.Nil(err) + suite.assert.NotNil(h) + suite.assert.Equal(h.Size, int64(5*_1MB)) + suite.assert.NotNil(h.Buffers.Cooked) + suite.assert.NotNil(h.Buffers.Cooking) + + tobj.blockCache.CloseFile(internal.CloseFileOptions{Handle: h}) + suite.assert.Nil(h.Buffers.Cooked) + suite.assert.Nil(h.Buffers.Cooking) +} + +func (suite *blockCacheTestSuite) TestFileRead() { + tobj, err := setupPipeline("") + defer tobj.cleanupPipeline() + + suite.assert.Nil(err) + suite.assert.NotNil(tobj.blockCache) + + fileName := "bc.tst" + stroagePath := filepath.Join(tobj.fake_storage_path, fileName) + data := make([]byte, 50*_1MB) + _, _ = rand.Read(data) + ioutil.WriteFile(stroagePath, data, 0777) + + options := internal.OpenFileOptions{Name: fileName} + h, err := tobj.blockCache.OpenFile(options) + suite.assert.Nil(err) + suite.assert.NotNil(h) + suite.assert.Equal(h.Size, int64(50*_1MB)) + suite.assert.NotNil(h.Buffers.Cooked) + suite.assert.NotNil(h.Buffers.Cooking) + + data = make([]byte, 1000) + + // Read beyond end of file + n, err := tobj.blockCache.ReadInBuffer(internal.ReadInBufferOptions{Handle: h, Offset: int64((50 * _1MB) + 1), Data: data}) + suite.assert.NotNil(err) + suite.assert.Equal(n, 0) + suite.assert.Contains(err.Error(), "EOF") + + // Read exactly at last offset + n, err = tobj.blockCache.ReadInBuffer(internal.ReadInBufferOptions{Handle: h, Offset: int64(50 * _1MB), Data: data}) + suite.assert.NotNil(err) + suite.assert.Equal(n, 0) + suite.assert.Contains(err.Error(), "EOF") + + n, err = tobj.blockCache.ReadInBuffer(internal.ReadInBufferOptions{Handle: h, Offset: 0, Data: data}) + suite.assert.Nil(err) + suite.assert.Equal(n, 1000) + + cnt := h.Buffers.Cooked.Len() + h.Buffers.Cooking.Len() + suite.assert.Equal(cnt, MIN_PREFETCH*2) + + tobj.blockCache.CloseFile(internal.CloseFileOptions{Handle: h}) + suite.assert.Nil(h.Buffers.Cooked) + suite.assert.Nil(h.Buffers.Cooking) +} + +func (suite *blockCacheTestSuite) TestFileReadSerial() { + tobj, err := setupPipeline("") + defer tobj.cleanupPipeline() + + suite.assert.Nil(err) + suite.assert.NotNil(tobj.blockCache) + + fileName := "bc.tst" + stroagePath := filepath.Join(tobj.fake_storage_path, fileName) + data := make([]byte, 50*_1MB) + _, _ = rand.Read(data) + ioutil.WriteFile(stroagePath, data, 0777) + + options := internal.OpenFileOptions{Name: fileName} + h, err := tobj.blockCache.OpenFile(options) + suite.assert.Nil(err) + suite.assert.NotNil(h) + suite.assert.Equal(h.Size, int64(50*_1MB)) + suite.assert.NotNil(h.Buffers.Cooked) + suite.assert.NotNil(h.Buffers.Cooking) + + data = make([]byte, 1000) + + totaldata := uint64(0) + for { + n, err := tobj.blockCache.ReadInBuffer(internal.ReadInBufferOptions{Handle: h, Offset: int64(totaldata), Data: data}) + totaldata += uint64(n) + if err != nil { + break + } + suite.assert.LessOrEqual(n, 1000) + } + + suite.assert.Equal(totaldata, uint64(50*_1MB)) + cnt := h.Buffers.Cooked.Len() + h.Buffers.Cooking.Len() + suite.assert.Equal(cnt, 12) + + tobj.blockCache.CloseFile(internal.CloseFileOptions{Handle: h}) + suite.assert.Nil(h.Buffers.Cooked) + suite.assert.Nil(h.Buffers.Cooking) +} + +func (suite *blockCacheTestSuite) TestFileReadRandom() { + tobj, err := setupPipeline("") + defer tobj.cleanupPipeline() + + suite.assert.Nil(err) + suite.assert.NotNil(tobj.blockCache) + + fileName := "bc.tst" + stroagePath := filepath.Join(tobj.fake_storage_path, fileName) + data := make([]byte, 100*_1MB) + _, _ = rand.Read(data) + ioutil.WriteFile(stroagePath, data, 0777) + + options := internal.OpenFileOptions{Name: fileName} + h, err := tobj.blockCache.OpenFile(options) + suite.assert.Nil(err) + suite.assert.NotNil(h) + suite.assert.Equal(h.Size, int64(100*_1MB)) + suite.assert.NotNil(h.Buffers.Cooked) + suite.assert.NotNil(h.Buffers.Cooking) + + data = make([]byte, 100) + max := int64(100 * _1MB) + for i := 0; i < 50; i++ { + offset := rand.Int63n(max) + n, _ := tobj.blockCache.ReadInBuffer(internal.ReadInBufferOptions{Handle: h, Offset: offset, Data: data}) + suite.assert.LessOrEqual(n, 100) + } + + cnt := h.Buffers.Cooked.Len() + h.Buffers.Cooking.Len() + suite.assert.LessOrEqual(cnt, 8) + + tobj.blockCache.CloseFile(internal.CloseFileOptions{Handle: h}) + suite.assert.Nil(h.Buffers.Cooked) + suite.assert.Nil(h.Buffers.Cooking) +} + +func (suite *blockCacheTestSuite) TestFileReadRandomNoPrefetch() { + tobj, err := setupPipeline("") + defer tobj.cleanupPipeline() + + suite.assert.Nil(err) + suite.assert.NotNil(tobj.blockCache) + + // Set the no prefetch mode here + tobj.blockCache.noPrefetch = true + tobj.blockCache.prefetch = 0 + + fileName := "bc.tst" + stroagePath := filepath.Join(tobj.fake_storage_path, fileName) + data := make([]byte, 100*_1MB) + _, _ = rand.Read(data) + ioutil.WriteFile(stroagePath, data, 0777) + + options := internal.OpenFileOptions{Name: fileName} + h, err := tobj.blockCache.OpenFile(options) + suite.assert.Nil(err) + suite.assert.NotNil(h) + suite.assert.Equal(h.Size, int64(100*_1MB)) + suite.assert.NotNil(h.Buffers.Cooked) + suite.assert.NotNil(h.Buffers.Cooking) + + data = make([]byte, 100) + max := int64(100 * _1MB) + for i := 0; i < 50; i++ { + offset := rand.Int63n(max) + n, _ := tobj.blockCache.ReadInBuffer(internal.ReadInBufferOptions{Handle: h, Offset: offset, Data: data}) + suite.assert.Equal(h.Buffers.Cooked.Len(), 1) + suite.assert.Equal(h.Buffers.Cooking.Len(), 0) + suite.assert.LessOrEqual(n, 100) + } + + cnt := h.Buffers.Cooked.Len() + h.Buffers.Cooking.Len() + suite.assert.Equal(cnt, 1) + + tobj.blockCache.CloseFile(internal.CloseFileOptions{Handle: h}) + suite.assert.Nil(h.Buffers.Cooked) + suite.assert.Nil(h.Buffers.Cooking) +} + +func (suite *blockCacheTestSuite) TestDiskUsageCheck() { + tobj, err := setupPipeline("") + defer tobj.cleanupPipeline() + + suite.assert.Nil(err) + suite.assert.NotNil(tobj.blockCache) + + usage, err := common.GetUsage(tobj.disk_cache_path) + suite.assert.Nil(err) + suite.assert.Less(usage, float64(1.0)) + suite.assert.Equal(tobj.blockCache.checkDiskUsage(), false) + + // Default disk size is 50MB + data := make([]byte, 5*_1MB) + _, _ = rand.Read(data) + + type diskusagedata struct { + name string + diskflag bool + } + + localfiles := make([]diskusagedata, 0) + for i := 0; i < 13; i++ { + fname := randomString(5) + diskFile := filepath.Join(tobj.disk_cache_path, fname) + localfiles = append(localfiles, diskusagedata{name: diskFile, diskflag: i >= 7}) + } + + for i := 0; i < 13; i++ { + ioutil.WriteFile(localfiles[i].name, data, 0777) + usage, err := common.GetUsage(tobj.disk_cache_path) + suite.assert.Nil(err) + fmt.Printf("%d : %v (%v : %v) Usage %v\n", i, localfiles[i].name, localfiles[i].diskflag, tobj.blockCache.checkDiskUsage(), usage) + suite.assert.Equal(tobj.blockCache.checkDiskUsage(), localfiles[i].diskflag) + } + + for i := 0; i < 13; i++ { + localfiles[i].diskflag = i < 8 + } + + for i := 0; i < 13; i++ { + os.Remove(localfiles[i].name) + usage, err := common.GetUsage(tobj.disk_cache_path) + suite.assert.Nil(err) + fmt.Printf("%d : %v (%v : %v) Usage %v\n", i, localfiles[i].name, localfiles[i].diskflag, tobj.blockCache.checkDiskUsage(), usage) + suite.assert.Equal(tobj.blockCache.checkDiskUsage(), localfiles[i].diskflag) + } +} + +// In order for 'go test' to run this suite, we need to create +// a normal test function and pass our suite to suite.Run +func TestBlockCacheTestSuite(t *testing.T) { + bcsuite := new(blockCacheTestSuite) + err := log.SetDefaultLogger("silent", common.LogConfig{Level: common.ELogLevel.LOG_DEBUG()}) + if err != nil { + panic("Unable to set silent logger as default.") + } + + suite.Run(t, bcsuite) +} diff --git a/component/block_cache/block_linux.go b/component/block_cache/block_linux.go new file mode 100644 index 000000000..9019a10d0 --- /dev/null +++ b/component/block_cache/block_linux.go @@ -0,0 +1,113 @@ +//go:build linux + +/* + _____ _____ _____ ____ ______ _____ ------ + | | | | | | | | | | | | | + | | | | | | | | | | | | | + | --- | | | | |-----| |---- | | |-----| |----- ------ + | | | | | | | | | | | | | + | ____| |_____ | ____| | ____| | |_____| _____| |_____ |_____ + + + Licensed under the MIT License . + + Copyright © 2020-2023 Microsoft Corporation. All rights reserved. + Author : + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in all + copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE +*/ + +package block_cache + +import ( + "container/list" + "fmt" + "syscall" +) + +// Block is a memory mapped buffer with its state to hold data +type Block struct { + offset uint64 // Start offset of the data this block holds + id int64 // Id of the block i.e. (offset / block size) + state chan int // Channel depicting data has been read for this block or not + data []byte // Data read from blob + node *list.Element // node representation of this block in the list inside handle +} + +// AllocateBlock creates a new memory mapped buffer for the given size +func AllocateBlock(size uint64) (*Block, error) { + if size == 0 { + return nil, fmt.Errorf("invalid size") + } + + prot, flags := syscall.PROT_READ|syscall.PROT_WRITE, syscall.MAP_ANON|syscall.MAP_PRIVATE + addr, err := syscall.Mmap(-1, 0, int(size), prot, flags) + + if err != nil { + return nil, fmt.Errorf("mmap error: %v", err) + } + + return &Block{ + data: addr, + state: nil, + id: -1, + node: nil, + }, nil + + // we do not create channel here, as that will be created when buffer is retrieved + // reinit will always be called before use and that will create the channel as well. +} + +// Delete cleans up the memory mapped buffer +func (b *Block) Delete() error { + if b.data == nil { + return fmt.Errorf("invalid buffer") + } + + err := syscall.Munmap(b.data) + b.data = nil + if err != nil { + // if we get here, there is likely memory corruption. + return fmt.Errorf("munmap error: %v", err) + } + + return nil +} + +// ReUse reinits the Block by recreating its channel +func (b *Block) ReUse() { + b.id = -1 + b.offset = 0 + b.state = make(chan int, 1) +} + +// ReadyForReading marks this Block is now ready for reading by its first reader (data download completed) +func (b *Block) ReadyForReading() { + select { + case b.state <- 1: + break + default: + break + } +} + +// Unblock marks this Block is ready to be read in parllel now +func (b *Block) Unblock() { + close(b.state) +} diff --git a/component/block_cache/block_linux_test.go b/component/block_cache/block_linux_test.go new file mode 100644 index 000000000..0e27c08f7 --- /dev/null +++ b/component/block_cache/block_linux_test.go @@ -0,0 +1,188 @@ +//go:build linux && !authtest +// +build linux,!authtest + +/* + _____ _____ _____ ____ ______ _____ ------ + | | | | | | | | | | | | | + | | | | | | | | | | | | | + | --- | | | | |-----| |---- | | |-----| |----- ------ + | | | | | | | | | | | | | + | ____| |_____ | ____| | ____| | |_____| _____| |_____ |_____ + + + Licensed under the MIT License . + + Copyright © 2020-2023 Microsoft Corporation. All rights reserved. + Author : + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in all + copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE +*/ + +package block_cache + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/suite" +) + +type blockTestSuite struct { + suite.Suite + assert *assert.Assertions +} + +func (suite *blockTestSuite) SetupTest() { +} + +func (suite *blockTestSuite) cleanupTest() { +} + +func (suite *blockTestSuite) TestAllocate() { + suite.assert = assert.New(suite.T()) + + b, err := AllocateBlock(0) + suite.assert.Nil(b) + suite.assert.NotNil(err) + suite.assert.Contains(err.Error(), "invalid size") + + b, err = AllocateBlock(10) + suite.assert.NotNil(b) + suite.assert.Nil(err) + suite.assert.NotNil(b.data) + + _ = b.Delete() +} + +func (suite *blockTestSuite) TestAllocateBig() { + suite.assert = assert.New(suite.T()) + + b, err := AllocateBlock(100 * 1024 * 1024) + suite.assert.NotNil(b) + suite.assert.Nil(err) + suite.assert.NotNil(b.data) + suite.assert.Equal(cap(b.data), 100*1024*1024) + + b.Delete() +} + +func (suite *blockTestSuite) TestAllocateHuge() { + suite.assert = assert.New(suite.T()) + + b, err := AllocateBlock(50 * 1024 * 1024 * 1024) + suite.assert.Nil(b) + suite.assert.NotNil(err) + suite.assert.Contains(err.Error(), "mmap error") +} + +func (suite *blockTestSuite) TestFreeNilData() { + suite.assert = assert.New(suite.T()) + + b, err := AllocateBlock(1) + suite.assert.NotNil(b) + suite.assert.Nil(err) + b.data = nil + + err = b.Delete() + suite.assert.NotNil(err) + suite.assert.Contains(err.Error(), "invalid buffer") +} + +func (suite *blockTestSuite) TestFreeInvalidData() { + suite.assert = assert.New(suite.T()) + + b, err := AllocateBlock(1) + suite.assert.NotNil(b) + suite.assert.Nil(err) + b.data = make([]byte, 1) + + err = b.Delete() + suite.assert.NotNil(err) + suite.assert.Contains(err.Error(), "invalid argument") +} + +func (suite *blockTestSuite) TestResuse() { + suite.assert = assert.New(suite.T()) + + b, err := AllocateBlock(1) + suite.assert.NotNil(b) + suite.assert.Nil(err) + suite.assert.Nil(b.state) + + b.ReUse() + suite.assert.NotNil(b.state) + suite.assert.Nil(b.node) + + _ = b.Delete() +} + +func (suite *blockTestSuite) TestReadyForReading() { + suite.assert = assert.New(suite.T()) + + b, err := AllocateBlock(1) + suite.assert.NotNil(b) + suite.assert.Nil(err) + suite.assert.Nil(b.state) + + b.ReUse() + suite.assert.NotNil(b.state) + + b.ReadyForReading() + suite.assert.Equal(len(b.state), 1) + + <-b.state + suite.assert.Equal(len(b.state), 0) + + b.ReUse() + suite.assert.NotNil(b.state) + + _ = b.Delete() +} + +func (suite *blockTestSuite) TestUnBlock() { + suite.assert = assert.New(suite.T()) + + b, err := AllocateBlock(1) + suite.assert.NotNil(b) + suite.assert.Nil(err) + suite.assert.Nil(b.state) + + b.ReUse() + suite.assert.NotNil(b.state) + suite.assert.Nil(b.node) + + b.ReadyForReading() + suite.assert.Equal(len(b.state), 1) + + <-b.state + suite.assert.Equal(len(b.state), 0) + + b.Unblock() + suite.assert.NotNil(b.state) + suite.assert.Equal(len(b.state), 0) + + <-b.state + suite.assert.Equal(len(b.state), 0) + + _ = b.Delete() +} + +func TestBlockSuite(t *testing.T) { + suite.Run(t, new(blockTestSuite)) +} diff --git a/component/block_cache/blockpool_linux.go b/component/block_cache/blockpool_linux.go new file mode 100644 index 000000000..a0d70d9b5 --- /dev/null +++ b/component/block_cache/blockpool_linux.go @@ -0,0 +1,152 @@ +//go:build linux + +/* + _____ _____ _____ ____ ______ _____ ------ + | | | | | | | | | | | | | + | | | | | | | | | | | | | + | --- | | | | |-----| |---- | | |-----| |----- ------ + | | | | | | | | | | | | | + | ____| |_____ | ____| | ____| | |_____| _____| |_____ |_____ + + + Licensed under the MIT License . + + Copyright © 2020-2023 Microsoft Corporation. All rights reserved. + Author : + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in all + copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE +*/ + +package block_cache + +import "cloudfuse/common/log" + +const _1MB uint64 = (1024 * 1024) + +// BlockPool is a pool of Blocks +type BlockPool struct { + // Channel holding free blocks + blocksCh chan *Block + + // Size of each block this pool holds + blockSize uint64 + + // Number of block that this pool can handle at max + maxBlocks uint32 +} + +// NewBlockPool allocates a new pool of blocks +func NewBlockPool(blockSize uint64, memSize uint64) *BlockPool { + // Ignore if config is invalid + if blockSize == 0 || memSize < blockSize { + log.Err("blockpool::NewBlockPool : blockSize : %v, memsize: %v", blockSize, memSize) + return nil + } + + // Calculate how many blocks can be allocated + blockCount := uint32(memSize / blockSize) + + pool := &BlockPool{ + blocksCh: make(chan *Block, blockCount), + maxBlocks: uint32(blockCount), + blockSize: blockSize, + } + + // Preallocate all blocks so that during runtime we do not spend CPU cycles on this + for i := (uint32)(0); i < blockCount; i++ { + b, err := AllocateBlock(blockSize) + if err != nil { + return nil + } + + pool.blocksCh <- b + } + + return pool +} + +// Terminate ends the block pool life +func (pool *BlockPool) Terminate() { + close(pool.blocksCh) + + // Release back the memory allocated to each block + for { + b := <-pool.blocksCh + if b == nil { + break + } + _ = b.Delete() + } +} + +// Usage provides % usage of this block pool +func (pool *BlockPool) Usage() uint32 { + return ((pool.maxBlocks - (uint32)(len(pool.blocksCh))) * 100) / pool.maxBlocks +} + +// MustGet a Block from the pool, wait until something is free +func (pool *BlockPool) MustGet() *Block { + var b *Block = nil + + select { + case b = <-pool.blocksCh: + break + + default: + // There are no free blocks so we must allocate one and return here + // As the consumer of the pool needs a block immediately + log.Info("BlockPool::MustGet : No free blocks, allocating a new one") + var err error + b, err = AllocateBlock(pool.blockSize) + if err != nil { + return nil + } + } + + // Mark the buffer ready for reuse now + b.ReUse() + return b +} + +// TryGet a Block from the pool, return back if nothing is available +func (pool *BlockPool) TryGet() *Block { + var b *Block = nil + + select { + case b = <-pool.blocksCh: + break + + default: + return nil + } + + // Mark the buffer ready for reuse now + b.ReUse() + return b +} + +// Release back the Block to the pool +func (pool *BlockPool) Release(b *Block) { + select { + case pool.blocksCh <- b: + break + default: + _ = b.Delete() + } +} diff --git a/component/block_cache/blockpool_linux_test.go b/component/block_cache/blockpool_linux_test.go new file mode 100644 index 000000000..086fc5c81 --- /dev/null +++ b/component/block_cache/blockpool_linux_test.go @@ -0,0 +1,166 @@ +//go:build linux && !authtest +// +build linux,!authtest + +/* + _____ _____ _____ ____ ______ _____ ------ + | | | | | | | | | | | | | + | | | | | | | | | | | | | + | --- | | | | |-----| |---- | | |-----| |----- ------ + | | | | | | | | | | | | | + | ____| |_____ | ____| | ____| | |_____| _____| |_____ |_____ + + + Licensed under the MIT License . + + Copyright © 2020-2023 Microsoft Corporation. All rights reserved. + Author : + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in all + copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE +*/ + +package block_cache + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/suite" +) + +type blockpoolTestSuite struct { + suite.Suite + assert *assert.Assertions +} + +func (suite *blockpoolTestSuite) SetupTest() { +} + +func (suite *blockpoolTestSuite) cleanupTest() { +} + +func (suite *blockpoolTestSuite) TestAllocate() { + suite.assert = assert.New(suite.T()) + + bp := NewBlockPool(0, 0) + suite.assert.Nil(bp) + + bp = NewBlockPool(1, 0) + suite.assert.Nil(bp) + + bp = NewBlockPool(1, 1) + suite.assert.NotNil(bp) + suite.assert.NotNil(bp.blocksCh) + + bp.Terminate() + suite.assert.Equal(len(bp.blocksCh), 0) +} + +func (suite *blockpoolTestSuite) TestGetRelease() { + suite.assert = assert.New(suite.T()) + + bp := NewBlockPool(1, 5) + suite.assert.NotNil(bp) + suite.assert.NotNil(bp.blocksCh) + suite.assert.Equal(len(bp.blocksCh), 5) + + b := bp.MustGet() + suite.assert.NotNil(b) + suite.assert.Equal(len(bp.blocksCh), 4) + + bp.Release(b) + suite.assert.Equal(len(bp.blocksCh), 5) + + b = bp.TryGet() + suite.assert.NotNil(b) + suite.assert.Equal(len(bp.blocksCh), 4) + + bp.Release(b) + suite.assert.Equal(len(bp.blocksCh), 5) + + bp.Terminate() + suite.assert.Equal(len(bp.blocksCh), 0) +} + +func (suite *blockpoolTestSuite) TestUsage() { + suite.assert = assert.New(suite.T()) + + bp := NewBlockPool(1, 5) + suite.assert.NotNil(bp) + suite.assert.NotNil(bp.blocksCh) + suite.assert.Equal(len(bp.blocksCh), 5) + + var blocks []*Block + b := bp.MustGet() + suite.assert.NotNil(b) + blocks = append(blocks, b) + + usage := bp.Usage() + suite.assert.Equal(usage, uint32(20)) + + b = bp.TryGet() + suite.assert.NotNil(b) + blocks = append(blocks, b) + + usage = bp.Usage() + suite.assert.Equal(usage, uint32(40)) + + for _, blk := range blocks { + bp.Release(blk) + } + + bp.Terminate() + suite.assert.Equal(len(bp.blocksCh), 0) +} + +func (suite *blockpoolTestSuite) TestBufferExhaution() { + suite.assert = assert.New(suite.T()) + + bp := NewBlockPool(1, 5) + suite.assert.NotNil(bp) + suite.assert.NotNil(bp.blocksCh) + suite.assert.Equal(len(bp.blocksCh), 5) + + var blocks []*Block + for i := 0; i < 5; i++ { + b := bp.MustGet() + suite.assert.NotNil(b) + blocks = append(blocks, b) + } + + usage := bp.Usage() + suite.assert.Equal(usage, uint32(100)) + + b := bp.TryGet() + suite.assert.Nil(b) + + b = bp.MustGet() + suite.assert.NotNil(b) + blocks = append(blocks, b) + + for _, blk := range blocks { + bp.Release(blk) + } + + bp.Terminate() + suite.assert.Equal(len(bp.blocksCh), 0) +} + +func TestBlockPoolSuite(t *testing.T) { + suite.Run(t, new(blockpoolTestSuite)) +} diff --git a/component/block_cache/threadpool_linux.go b/component/block_cache/threadpool_linux.go new file mode 100644 index 000000000..6183cd62c --- /dev/null +++ b/component/block_cache/threadpool_linux.go @@ -0,0 +1,148 @@ +//go:build linux + +/* + _____ _____ _____ ____ ______ _____ ------ + | | | | | | | | | | | | | + | | | | | | | | | | | | | + | --- | | | | |-----| |---- | | |-----| |----- ------ + | | | | | | | | | | | | | + | ____| |_____ | ____| | ____| | |_____| _____| |_____ |_____ + + + Licensed under the MIT License . + + Copyright © 2020-2023 Microsoft Corporation. All rights reserved. + Author : + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in all + copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE +*/ + +package block_cache + +import ( + "sync" + + "cloudfuse/internal/handlemap" +) + +// ThreadPool is a group of workers that can be used to execute a task +type ThreadPool struct { + // Number of workers running in this group + worker uint32 + + // Channel to close all the workers + close chan int + + // Wait group to wait for all workers to finish + wg sync.WaitGroup + + // Channel to hold pending requests + priorityCh chan *workItem + normalCh chan *workItem + + // Reader method that will actually read the data + reader func(*workItem) +} + +// One workitem to be scheduled +type workItem struct { + handle *handlemap.Handle // Handle to which this item belongs + block *Block // Block to hold data for this item + prefetch bool // Flag marking this is a prefetch request or not + failCnt int32 // How many times this item has failed to download +} + +// newThreadPool creates a new thread pool +func newThreadPool(count uint32, reader func(*workItem)) *ThreadPool { + if count == 0 || reader == nil { + return nil + } + + return &ThreadPool{ + worker: count, + reader: reader, + close: make(chan int, count), + priorityCh: make(chan *workItem, count*2), + normalCh: make(chan *workItem, count*5000), + } +} + +// Start all the workers and wait till they start receiving requests +func (t *ThreadPool) Start() { + // 10% threads will listne only on high priority channel + highPriority := (t.worker * 10) / 100 + + for i := uint32(0); i < t.worker; i++ { + t.wg.Add(1) + go t.Do(i < highPriority) + } +} + +// Stop all the workers threads +func (t *ThreadPool) Stop() { + for i := uint32(0); i < t.worker; i++ { + t.close <- 1 + } + + t.wg.Wait() + + close(t.close) + close(t.priorityCh) + close(t.normalCh) +} + +// Schedule the download of a block +func (t *ThreadPool) Schedule(urgent bool, item *workItem) { + // urgent specifies the priority of this task. + // true means high priority and false means low priority + if urgent { + t.priorityCh <- item + } else { + t.normalCh <- item + } +} + +// Do is the core task to be executed by each worker thread +func (t *ThreadPool) Do(priority bool) { + defer t.wg.Done() + + if priority { + // This thread will work only on high priority channel + for { + select { + case item := <-t.priorityCh: + t.reader(item) + case <-t.close: + return + } + } + } else { + // This thread will work only on both high and low priority channel + for { + select { + case item := <-t.priorityCh: + t.reader(item) + case item := <-t.normalCh: + t.reader(item) + case <-t.close: + return + } + } + } +} diff --git a/component/block_cache/threadpool_linux_test.go b/component/block_cache/threadpool_linux_test.go new file mode 100644 index 000000000..8399fdc26 --- /dev/null +++ b/component/block_cache/threadpool_linux_test.go @@ -0,0 +1,141 @@ +//go:build linux && !authtest +// +build linux,!authtest + +/* + _____ _____ _____ ____ ______ _____ ------ + | | | | | | | | | | | | | + | | | | | | | | | | | | | + | --- | | | | |-----| |---- | | |-----| |----- ------ + | | | | | | | | | | | | | + | ____| |_____ | ____| | ____| | |_____| _____| |_____ |_____ + + + Licensed under the MIT License . + + Copyright © 2020-2023 Microsoft Corporation. All rights reserved. + Author : + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in all + copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE +*/ + +package block_cache + +import ( + "sync/atomic" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/suite" +) + +type threadPoolTestSuite struct { + suite.Suite + assert *assert.Assertions +} + +func (suite *threadPoolTestSuite) SetupTest() { +} + +func (suite *threadPoolTestSuite) cleanupTest() { +} + +func (suite *threadPoolTestSuite) TestCreate() { + suite.assert = assert.New(suite.T()) + + tp := newThreadPool(0, nil) + suite.assert.Nil(tp) + + tp = newThreadPool(1, nil) + suite.assert.Nil(tp) + + tp = newThreadPool(1, func(*workItem) {}) + suite.assert.NotNil(tp) + suite.assert.Equal(tp.worker, uint32(1)) +} + +func (suite *threadPoolTestSuite) TestStartStop() { + suite.assert = assert.New(suite.T()) + + r := func(i *workItem) { + suite.assert.Equal(i.failCnt, int32(1)) + } + + tp := newThreadPool(2, r) + suite.assert.NotNil(tp) + suite.assert.Equal(tp.worker, uint32(2)) + + tp.Start() + suite.assert.NotNil(tp.priorityCh) + suite.assert.NotNil(tp.normalCh) + + tp.Stop() +} + +func (suite *threadPoolTestSuite) TestSchedule() { + suite.assert = assert.New(suite.T()) + + r := func(i *workItem) { + suite.assert.Equal(i.failCnt, int32(1)) + } + + tp := newThreadPool(2, r) + suite.assert.NotNil(tp) + suite.assert.Equal(tp.worker, uint32(2)) + + tp.Start() + suite.assert.NotNil(tp.priorityCh) + suite.assert.NotNil(tp.normalCh) + + tp.Schedule(false, &workItem{failCnt: 1}) + tp.Schedule(true, &workItem{failCnt: 1}) + + time.Sleep(1 * time.Second) + tp.Stop() +} + +func (suite *threadPoolTestSuite) TestPrioritySchedule() { + suite.assert = assert.New(suite.T()) + + callbackCnt := int32(0) + r := func(i *workItem) { + suite.assert.Equal(i.failCnt, int32(5)) + atomic.AddInt32(&callbackCnt, 1) + } + + tp := newThreadPool(10, r) + suite.assert.NotNil(tp) + suite.assert.Equal(tp.worker, uint32(10)) + + tp.Start() + suite.assert.NotNil(tp.priorityCh) + suite.assert.NotNil(tp.normalCh) + + for i := 0; i < 100; i++ { + tp.Schedule(i < 20, &workItem{failCnt: 5}) + } + + time.Sleep(1 * time.Second) + suite.assert.Equal(callbackCnt, int32(100)) + tp.Stop() +} + +func TestThreadPoolSuite(t *testing.T) { + suite.Run(t, new(threadPoolTestSuite)) +} diff --git a/component/file_cache/cache_policy.go b/component/file_cache/cache_policy.go index f18c5ec32..3af7ec02f 100644 --- a/component/file_cache/cache_policy.go +++ b/component/file_cache/cache_policy.go @@ -44,7 +44,6 @@ import ( ) const DefaultEvictTime = 10 -const sectorSize = 4096 type cachePolicyConfig struct { tmpPath string @@ -77,12 +76,25 @@ type cachePolicy interface { // getUsagePercentage: The current cache usage as a percentage of the maxSize func getUsagePercentage(path string, maxSize float64) float64 { + var currSize float64 + var usagePercent float64 + var err error + if maxSize == 0 { - return 0 + currSize, usagePercent, err = common.GetDiskUsageFromStatfs(path) + if err != nil { + log.Err("cachePolicy::getUsagePercentage : failed to get disk usage for %s [%v]", path, err.Error) + } + } else { + // We need to compuate % usage of temp directory against configured limit + currSize, err = common.GetUsage(path) + if err != nil { + log.Err("cachePolicy::getUsagePercentage : failed to get directory usage for %s [%v]", path, err.Error) + } + + usagePercent = (currSize / float64(maxSize)) * 100 } - currSize, _ := getUsage(path) - usagePercent := (currSize / float64(maxSize)) * 100 log.Debug("cachePolicy::getUsagePercentage : current cache usage : %f%%", usagePercent) fileCacheStatsCollector.UpdateStats(stats_manager.Replace, cacheUsage, fmt.Sprintf("%f MB", currSize)) diff --git a/component/file_cache/cache_policy_linux.go b/component/file_cache/cache_policy_linux.go deleted file mode 100644 index 6167d8b51..000000000 --- a/component/file_cache/cache_policy_linux.go +++ /dev/null @@ -1,114 +0,0 @@ -//go:build linux - -/* - _____ _____ _____ ____ ______ _____ ------ - | | | | | | | | | | | | | - | | | | | | | | | | | | | - | --- | | | | |-----| |---- | | |-----| |----- ------ - | | | | | | | | | | | | | - | ____| |_____ | ____| | ____| | |_____| _____| |_____ |_____ - - - Licensed under the MIT License . - - Copyright © 2023 Seagate Technology LLC and/or its Affiliates - Copyright © 2020-2023 Microsoft Corporation. All rights reserved. - Author : - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in all - copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - SOFTWARE -*/ - -package file_cache - -import ( - "bytes" - "cloudfuse/common/log" - "fmt" - "os" - "os/exec" - "strconv" - "strings" -) - -var duPath []string = []string{"/usr/bin/du", "/usr/local/bin/du", "/usr/sbin/du", "/usr/local/sbin/du", "/sbin/du", "/bin/du"} -var selectedDuPath string = "" - -// getUsage: The current cache usage in MB -func getUsage(path string) (float64, error) { - log.Trace("cachePolicy::getCacheUsage : %s", path) - - var currSize float64 - var out bytes.Buffer - - if selectedDuPath == "" { - selectedDuPath = "-" - for _, dup := range duPath { - _, err := os.Stat(dup) - if err == nil { - selectedDuPath = dup - break - } - } - } - - if selectedDuPath == "-" { - log.Err("cachePolicy::getCacheUsage : error finding du in any configured path") - return 0, fmt.Errorf("failed to find du") - } - - // du - estimates file space usage - // https://man7.org/linux/man-pages/man1/du.1.html - // Note: We cannot just pass -BM as a parameter here since it will result in less accurate estimates of the size of the path - // (i.e. du will round up to 1M if the path is smaller than 1M). - cmd := exec.Command(selectedDuPath, "-sh", path) - cmd.Stdout = &out - - err := cmd.Run() - if err != nil { - log.Err("cachePolicy::getCacheUsage : error running du [%s]", err.Error()) - return 0, err - } - - size := strings.Split(out.String(), "\t")[0] - if size == "0" { - return 0, fmt.Errorf("failed to parse du output") - } - - // some OS's use "," instead of "." that will not work for float parsing - replace it - size = strings.Replace(size, ",", ".", 1) - parsed, err := strconv.ParseFloat(size[:len(size)-1], 64) - if err != nil { - log.Err("cachePolicy::getCacheUsage : error parsing folder size [%s]", err.Error()) - return 0, fmt.Errorf("failed to parse du output") - } - - switch size[len(size)-1] { - case 'K': - currSize = parsed / float64(1024) - case 'M': - currSize = parsed - case 'G': - currSize = parsed * 1024 - case 'T': - currSize = parsed * 1024 * 1024 - } - - log.Debug("cachePolicy::getCacheUsage : current cache usage : %fMB", currSize) - return currSize, nil -} diff --git a/component/file_cache/cache_policy_test.go b/component/file_cache/cache_policy_test.go index bd3e0e1f5..93b87edc4 100644 --- a/component/file_cache/cache_policy_test.go +++ b/component/file_cache/cache_policy_test.go @@ -70,8 +70,9 @@ func (suite *cachePolicyTestSuite) TestGetUsage() { f, _ := os.Create(cache_path + "/test") data := make([]byte, 1024*1024) f.Write(data) - result, _ := getUsage(cache_path) + result, _ := common.GetUsage(cache_path) suite.assert.Equal(float64(1), math.Floor(result)) + f.Close() } // We should return the sector size used. Here there should be two sectors used @@ -80,24 +81,31 @@ func (suite *cachePolicyTestSuite) TestGetUsageSizeOnDisk() { f, _ := os.Create(cache_path + "/test") data := make([]byte, 4097) f.Write(data) - result, err := getUsage(cache_path) + result, err := common.GetUsage(cache_path) suite.assert.Nil(err) // Linux du overestimates the number of sectors used by 1 sometimes // So check that we aren't more or less than 1 sector size off. - suite.assert.GreaterOrEqual(result, 2.0*sectorSize/MB) - suite.assert.LessOrEqual(result, 3.0*sectorSize/MB) + suite.assert.GreaterOrEqual(result, 2.0*common.SectorSize/MB) + suite.assert.LessOrEqual(result, 3.0*common.SectorSize/MB) } func (suite *cachePolicyTestSuite) TestGetUsagePercentage() { defer suite.cleanupTest() - f, _ := os.Create(cache_path + "/test") data := make([]byte, 1024*1024) + + f, _ := os.Create(cache_path + "/test") f.Write(data) result := getUsagePercentage(cache_path, 4) // since the value might defer a little distro to distro suite.assert.GreaterOrEqual(result, float64(25)) suite.assert.LessOrEqual(result, float64(30)) + f.Close() + + result = getUsagePercentage("/", 0) + // since the value might defer a little distro to distro + suite.assert.GreaterOrEqual(result, float64(0)) + suite.assert.LessOrEqual(result, float64(90)) } func (suite *cachePolicyTestSuite) TestDeleteFile() { @@ -105,6 +113,7 @@ func (suite *cachePolicyTestSuite) TestDeleteFile() { f, _ := os.Create(cache_path + "/test") result := deleteFile(f.Name() + "not_exist") suite.assert.Equal(nil, result) + f.Close() } func TestCachePolicyTestSuite(t *testing.T) { diff --git a/component/file_cache/file_cache.go b/component/file_cache/file_cache.go index 3280d1bd2..fde73d39e 100644 --- a/component/file_cache/file_cache.go +++ b/component/file_cache/file_cache.go @@ -313,8 +313,8 @@ func (c *FileCache) Configure(_ bool) error { log.Warn("Sync will upload current contents of file.") } - log.Info("FileCache::Configure : create-empty %t, cache-timeout %d, tmp-path %s, max-size-mb %d, high-mark %d, low-mark %d, refresh-sec %v", - c.createEmptyFile, int(c.cacheTimeout), c.tmpPath, int(cacheConfig.maxSizeMB), int(cacheConfig.highThreshold), int(cacheConfig.lowThreshold), c.refreshSec) + log.Info("FileCache::Configure : create-empty %t, cache-timeout %d, tmp-path %s, max-size-mb %d, high-mark %d, low-mark %d, refresh-sec %v, max-eviction %v", + c.createEmptyFile, int(c.cacheTimeout), c.tmpPath, int(cacheConfig.maxSizeMB), int(cacheConfig.highThreshold), int(cacheConfig.lowThreshold), c.refreshSec, cacheConfig.maxEviction) return nil } @@ -1204,6 +1204,15 @@ func (fc *FileCache) RenameFile(options internal.RenameFileOptions) error { } fc.policy.CachePurge(localSrcPath) + + if fc.cacheTimeout == 0 { + // Destination file needs to be deleted immediately + fc.policy.CachePurge(localDstPath) + } else { + // Add destination file to cache, it will be removed on timeout + fc.policy.CacheValid(localDstPath) + } + return nil } @@ -1211,33 +1220,45 @@ func (fc *FileCache) RenameFile(options internal.RenameFileOptions) error { func (fc *FileCache) TruncateFile(options internal.TruncateFileOptions) error { log.Trace("FileCache::TruncateFile : name=%s, size=%d", options.Name, options.Size) - flock := fc.fileLocks.Get(options.Name) - flock.Lock() - defer flock.Unlock() + // If you call truncate CLI command from shell it always sends an open call first followed by truncate + // But if you call the truncate method from a C/C++ code then open is not hit and only truncate comes - err := fc.NextComponent().TruncateFile(options) - err = fc.validateStorageError(options.Name, err, "TruncateFile", true) - if err != nil { - log.Err("FileCache::TruncateFile : %s failed to truncate [%s]", options.Name, err.Error()) - return err + var h *handlemap.Handle = nil + var err error = nil + + if options.Size == 0 { + // If size is 0 then no need to download any file we can just create an empty file + h, err = fc.CreateFile(internal.CreateFileOptions{Name: options.Name, Mode: fc.defaultPermission}) + if err != nil { + log.Err("FileCache::TruncateFile : Error creating file %s [%s]", options.Name, err.Error()) + return err + } + } else { + // If size is not 0 then we need to open the file and then truncate it + // Open will force download if file was not present in local system + h, err = fc.OpenFile(internal.OpenFileOptions{Name: options.Name, Flags: os.O_RDWR, Mode: fc.defaultPermission}) + if err != nil { + log.Err("FileCache::TruncateFile : Error opening file %s [%s]", options.Name, err.Error()) + return err + } } // Update the size of the file in the local cache localPath := common.JoinUnixFilepath(fc.tmpPath, options.Name) - info, err := os.Stat(localPath) - if err == nil || os.IsExist(err) { - fc.policy.CacheValid(localPath) + fc.policy.CacheValid(localPath) - if info.Size() != options.Size { - err = os.Truncate(localPath, options.Size) - if err != nil { - log.Err("FileCache::TruncateFile : error truncating cached file %s [%s]", localPath, err.Error()) - return err - } - } + // Truncate the file created in local system + err = os.Truncate(localPath, options.Size) + if err != nil { + log.Err("FileCache::TruncateFile : error truncating cached file %s [%s]", localPath, err.Error()) + _ = fc.CloseFile(internal.CloseFileOptions{Handle: h}) + return err } - return nil + // Mark the handle as dirty so that close of this file will force an upload + h.Flags.Set(handlemap.HandleFlagDirty) + + return fc.CloseFile(internal.CloseFileOptions{Handle: h}) } // Chmod : Update the file with its new permissions diff --git a/component/file_cache/file_cache_linux.go b/component/file_cache/file_cache_linux.go index 5d1ed4185..0b5b9e0f3 100644 --- a/component/file_cache/file_cache_linux.go +++ b/component/file_cache/file_cache_linux.go @@ -162,7 +162,7 @@ func (c *FileCache) StatFs() (*common.Statfs_t, bool, error) { if maxCacheSize == 0 { return nil, false, nil } - usage, _ := getUsage(c.tmpPath) + usage, _ := common.GetUsage(c.tmpPath) usage *= MB available := maxCacheSize - usage diff --git a/component/file_cache/file_cache_test.go b/component/file_cache/file_cache_test.go index 5fdf69fea..baf350677 100644 --- a/component/file_cache/file_cache_test.go +++ b/component/file_cache/file_cache_test.go @@ -423,6 +423,15 @@ func (suite *fileCacheTestSuite) TestReadDirCase3() { suite.assert.EqualValues(subdir, dir[3].Path) } +func pos(s []*internal.ObjAttr, e string) int { + for i, v := range s { + if v.Path == e { + return i + } + } + return -1 +} + func (suite *fileCacheTestSuite) TestReadDirMixed() { defer suite.cleanupTest() // Setup @@ -431,29 +440,41 @@ func (suite *fileCacheTestSuite) TestReadDirMixed() { file1 := name + "/file1" // case 1 file2 := name + "/file2" // case 2 file3 := name + "/file3" // case 3 + file4 := name + "/file4" // case 4 suite.fileCache.CreateDir(internal.CreateDirOptions{Name: name, Mode: 0777}) suite.fileCache.CreateDir(internal.CreateDirOptions{Name: subdir, Mode: 0777}) + // By default createEmptyFile is false, so we will not create these files in storage until they are closed. suite.fileCache.CreateFile(internal.CreateFileOptions{Name: file2, Mode: 0777}) suite.fileCache.TruncateFile(internal.TruncateFileOptions{Name: file2, Size: 1024}) suite.fileCache.CreateFile(internal.CreateFileOptions{Name: file3, Mode: 0777}) suite.fileCache.TruncateFile(internal.TruncateFileOptions{Name: file3, Size: 1024}) + // Create the files in fake_storage and simulate different sizes suite.loopback.CreateFile(internal.CreateFileOptions{Name: file1, Mode: 0777}) // Length is default 0 suite.loopback.CreateFile(internal.CreateFileOptions{Name: file3, Mode: 0777}) + suite.loopback.CreateFile(internal.CreateFileOptions{Name: file4, Mode: 0777}) + suite.fileCache.TruncateFile(internal.TruncateFileOptions{Name: file4, Size: 1024}) + suite.fileCache.TruncateFile(internal.TruncateFileOptions{Name: file4, Size: 0}) + // Read the Directory dir, err := suite.fileCache.ReadDir(internal.ReadDirOptions{Name: name}) suite.assert.Nil(err) suite.assert.NotEmpty(dir) - suite.assert.EqualValues(4, len(dir)) - suite.assert.EqualValues(file1, dir[0].Path) - suite.assert.EqualValues(0, dir[0].Size) - suite.assert.EqualValues(file3, dir[1].Path) - suite.assert.EqualValues(1024, dir[1].Size) - suite.assert.EqualValues(subdir, dir[2].Path) - suite.assert.EqualValues(file2, dir[3].Path) - suite.assert.EqualValues(1024, dir[3].Size) + + var i int + i = pos(dir, file1) + suite.assert.EqualValues(0, dir[i].Size) + + i = pos(dir, file3) + suite.assert.EqualValues(1024, dir[i].Size) + + i = pos(dir, file2) + suite.assert.EqualValues(1024, dir[i].Size) + + i = pos(dir, file4) + suite.assert.EqualValues(0, dir[i].Size) } func (suite *fileCacheTestSuite) TestReadDirError() { @@ -1251,6 +1272,92 @@ func (suite *fileCacheTestSuite) TestRenameFileCase2() { suite.assert.True(os.IsNotExist(err)) } +func (suite *fileCacheTestSuite) TestRenameFileAndCacheCleanup() { + defer suite.cleanupTest() + suite.cleanupTest() + + config := fmt.Sprintf("file_cache:\n path: %s\n offload-io: true\n timeout-sec: 10\n\nloopbackfs:\n path: %s", + suite.cache_path, suite.fake_storage_path) + suite.setupTestHelper(config) // setup a new file cache with a custom config (teardown will occur after the test as usual) + + src := "source1" + dst := "destination2" + createHandle, _ := suite.fileCache.CreateFile(internal.CreateFileOptions{Name: src, Mode: 0666}) + suite.fileCache.CloseFile(internal.CloseFileOptions{Handle: createHandle}) + openHandle, _ := suite.fileCache.OpenFile(internal.OpenFileOptions{Name: src, Mode: 0666}) + + // Path should be in the file cache + _, err := os.Stat(suite.cache_path + "/" + src) + suite.assert.True(err == nil || os.IsExist(err)) + // Path should be in fake storage + _, err = os.Stat(suite.fake_storage_path + "/" + src) + suite.assert.True(err == nil || os.IsExist(err)) + + // RenameFile + err = suite.fileCache.RenameFile(internal.RenameFileOptions{Src: src, Dst: dst}) + suite.assert.Nil(err) + // Path in fake storage and file cache should be updated + _, err = os.Stat(suite.cache_path + "/" + src) // Src does not exist + suite.assert.True(os.IsNotExist(err)) + _, err = os.Stat(suite.cache_path + "/" + dst) // Dst shall exists in cache + suite.assert.True(err == nil || os.IsExist(err)) + _, err = os.Stat(suite.fake_storage_path + "/" + src) // Src does not exist + suite.assert.True(os.IsNotExist(err)) + _, err = os.Stat(suite.fake_storage_path + "/" + dst) // Dst does exist + suite.assert.True(err == nil || os.IsExist(err)) + + suite.fileCache.CloseFile(internal.CloseFileOptions{Handle: openHandle}) + + time.Sleep(5 * time.Second) // Check once before the cache cleanup that file exists + _, err = os.Stat(suite.cache_path + "/" + dst) // Dst shall exists in cache + suite.assert.True(err == nil || os.IsExist(err)) + + time.Sleep(8 * time.Second) // Wait for the cache cleanup to occur + _, err = os.Stat(suite.cache_path + "/" + dst) // Dst shall not exists in cache + suite.assert.True(err == nil || os.IsNotExist(err)) +} + +func (suite *fileCacheTestSuite) TestRenameFileAndCacheCleanupWithNoTimeout() { + defer suite.cleanupTest() + suite.cleanupTest() + + config := fmt.Sprintf("file_cache:\n path: %s\n offload-io: true\n timeout-sec: 0\n\nloopbackfs:\n path: %s", + suite.cache_path, suite.fake_storage_path) + suite.setupTestHelper(config) // setup a new file cache with a custom config (teardown will occur after the test as usual) + + src := "source1" + dst := "destination2" + createHandle, _ := suite.fileCache.CreateFile(internal.CreateFileOptions{Name: src, Mode: 0666}) + suite.fileCache.CloseFile(internal.CloseFileOptions{Handle: createHandle}) + openHandle, _ := suite.fileCache.OpenFile(internal.OpenFileOptions{Name: src, Mode: 0666}) + + // Path should be in the file cache + _, err := os.Stat(suite.cache_path + "/" + src) + suite.assert.True(err == nil || os.IsExist(err)) + // Path should be in fake storage + _, err = os.Stat(suite.fake_storage_path + "/" + src) + suite.assert.True(err == nil || os.IsExist(err)) + + // RenameFile + err = suite.fileCache.RenameFile(internal.RenameFileOptions{Src: src, Dst: dst}) + suite.assert.Nil(err) + // Path in fake storage and file cache should be updated + _, err = os.Stat(suite.cache_path + "/" + src) // Src does not exist + suite.assert.True(os.IsNotExist(err)) + _, err = os.Stat(suite.cache_path + "/" + dst) // Dst shall exists in cache + suite.assert.True(err == nil || os.IsExist(err)) + _, err = os.Stat(suite.fake_storage_path + "/" + src) // Src does not exist + suite.assert.True(os.IsNotExist(err)) + _, err = os.Stat(suite.fake_storage_path + "/" + dst) // Dst does exist + suite.assert.True(err == nil || os.IsExist(err)) + + suite.fileCache.CloseFile(internal.CloseFileOptions{Handle: openHandle}) + + time.Sleep(1 * time.Second) // Wait for the cache cleanup to occur + _, err = os.Stat(suite.cache_path + "/" + dst) // Dst shall not exists in cache + suite.assert.True(err == nil || os.IsNotExist(err)) +} + func (suite *fileCacheTestSuite) TestTruncateFileNotInCache() { defer suite.cleanupTest() // Setup @@ -1323,8 +1430,9 @@ func (suite *fileCacheTestSuite) TestTruncateFileCase2() { suite.assert.EqualValues(info.Size(), size) // Path should not be in fake storage - _, err = os.Stat(common.JoinUnixFilepath(suite.fake_storage_path, path)) - suite.assert.True(os.IsNotExist(err)) + // With new changes we always download and then truncate so file will exists in local path + // _, err = os.Stat(suite.fake_storage_path + "/" + path) + // suite.assert.True(os.IsNotExist(err)) } func (suite *fileCacheTestSuite) TestZZMountPathConflict() { diff --git a/component/file_cache/file_cache_windows.go b/component/file_cache/file_cache_windows.go index 4630d2e3b..251711856 100644 --- a/component/file_cache/file_cache_windows.go +++ b/component/file_cache/file_cache_windows.go @@ -162,7 +162,7 @@ func (fc *FileCache) StatFs() (*common.Statfs_t, bool, error) { if maxCacheSize == 0 { return nil, false, nil } - usage, _ := getUsage(fc.tmpPath) + usage, _ := common.GetUsage(fc.tmpPath) available := maxCacheSize - usage var free, total, avail uint64 diff --git a/component/file_cache/lru_policy.go b/component/file_cache/lru_policy.go index 70b84578c..3ea35cbb7 100644 --- a/component/file_cache/lru_policy.go +++ b/component/file_cache/lru_policy.go @@ -40,6 +40,7 @@ import ( "sync" "time" + "cloudfuse/common" "cloudfuse/common/log" ) @@ -123,7 +124,7 @@ func (p *lruPolicy) StartPolicy() error { p.deleteEvent = make(chan string, 1000) p.validateChan = make(chan string, 10000) - _, err := getUsage(p.tmpPath) + _, err := common.GetUsage(p.tmpPath) if err == nil { p.duPresent = true } else { diff --git a/component/libfuse/libfuse.go b/component/libfuse/libfuse.go index 1742fb7b0..9f910115f 100644 --- a/component/libfuse/libfuse.go +++ b/component/libfuse/libfuse.go @@ -272,6 +272,12 @@ func (lf *Libfuse) Configure(_ bool) error { log.Err("Libfuse::Configure : config error [invalid config attributes]") return fmt.Errorf("config error in %s [invalid config attributes]", lf.Name()) } + + err = config.UnmarshalKey("lfuse", &conf) + if err != nil { + log.Err("Libfuse::Configure : config error [invalid config attributes: %s]", err.Error()) + return fmt.Errorf("config error in lfuse [invalid config attributes]") + } // Extract values from 'conf' and store them as you wish here err = config.UnmarshalKey("mount-path", &conf.mountPath) @@ -310,9 +316,9 @@ func (lf *Libfuse) Configure(_ bool) error { } log.Info("Libfuse::Configure : read-only %t, allow-other %t, allow-root %t, default-perm %d, entry-timeout %d, attr-time %d, negative-timeout %d, "+ - "ignore-open-flags: %t, nonempty %t, network-share %t", + "ignore-open-flags: %t, nonempty %t, network-share %t, direct_io %t", lf.readOnly, lf.allowOther, lf.allowRoot, lf.filePermission, lf.entryExpiration, lf.attributeExpiration, lf.negativeTimeout, - lf.ignoreOpenFlags, lf.nonEmptyMount, lf.networkShare) + lf.ignoreOpenFlags, lf.nonEmptyMount, lf.networkShare, lf.directIO) return nil } diff --git a/component/libfuse/libfuse2_handler.go b/component/libfuse/libfuse2_handler.go index 84dcec6de..659ac82a6 100644 --- a/component/libfuse/libfuse2_handler.go +++ b/component/libfuse/libfuse2_handler.go @@ -140,8 +140,10 @@ func (lf *Libfuse) initFuse() error { // direct_io option is used to bypass the kernel cache. It disables the use of // page cache (file content cache) in the kernel for the filesystem. - if lf.directIO { + if fuseFS.directIO { options += ",direct_io" + } else { + options += ",kernel_cache" } // Setup options as a slice @@ -364,7 +366,10 @@ func (cf *CgofuseFS) Readdir(path string, fill func(name string, stat *fuse.Stat return -fuse.EBADF } + handle.RLock() val, found := handle.GetValue("cache") + handle.RUnlock() + if !found { return -fuse.EIO } diff --git a/component/loopback/loopback_fs.go b/component/loopback/loopback_fs.go index eb1b06d85..577a7f23e 100644 --- a/component/loopback/loopback_fs.go +++ b/component/loopback/loopback_fs.go @@ -313,6 +313,17 @@ func (lfs *LoopbackFS) ReadInBuffer(options internal.ReadInBufferOptions) (int, log.Trace("LoopbackFS::ReadInBuffer : name=%s", options.Handle.Path) f := options.Handle.GetFileObject() + if f == nil { + f1, err := os.OpenFile(common.JoinUnixFilepath(lfs.path, options.Handle.Path), os.O_RDONLY, 0777) + if err != nil { + return 0, nil + } + + n, err := f1.ReadAt(options.Data, options.Offset) + f1.Close() + return n, err + } + options.Handle.RLock() defer options.Handle.RUnlock() diff --git a/go.mod b/go.mod index 0286bf63d..ac6a34e5d 100755 --- a/go.mod +++ b/go.mod @@ -28,6 +28,7 @@ require ( github.com/spf13/viper v1.16.0 github.com/stretchr/testify v1.8.4 github.com/winfsp/cgofuse v1.5.0 + github.com/vibhansa-msft/tlru v0.0.0-20230621165448-dbd42234ad22 go.uber.org/atomic v1.11.0 golang.org/x/exp v0.0.0-20230817173708-d852ddb80c63 golang.org/x/sys v0.11.0 @@ -37,11 +38,11 @@ require ( ) require ( - cloud.google.com/go v0.110.3 // indirect - cloud.google.com/go/compute v1.20.1 // indirect + cloud.google.com/go v0.110.7 // indirect + cloud.google.com/go/compute v1.23.0 // indirect cloud.google.com/go/compute/metadata v0.2.3 // indirect - cloud.google.com/go/iam v1.1.1 // indirect - cloud.google.com/go/storage v1.31.0 // indirect + cloud.google.com/go/iam v1.1.2 // indirect + cloud.google.com/go/storage v1.32.0 // indirect github.com/Azure/azure-storage-file-go v0.6.1-0.20201111053559-3c1754dc00a5 // indirect github.com/Azure/go-autorest v14.2.0+incompatible // indirect github.com/Azure/go-autorest/autorest/date v0.3.0 // indirect @@ -69,10 +70,10 @@ require ( github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect github.com/golang/protobuf v1.5.3 // indirect github.com/google/go-cmp v0.5.9 // indirect - github.com/google/s2a-go v0.1.4 // indirect + github.com/google/s2a-go v0.1.5 // indirect github.com/google/uuid v1.3.0 // indirect github.com/googleapis/enterprise-certificate-proxy v0.2.5 // indirect - github.com/googleapis/gax-go/v2 v2.11.0 // indirect + github.com/googleapis/gax-go/v2 v2.12.0 // indirect github.com/hashicorp/hcl v1.0.0 // indirect github.com/hillu/go-ntdll v0.0.0-20230408164318-f8894bfa00af // indirect github.com/inconshreveable/mousetrap v1.1.0 // indirect @@ -83,7 +84,7 @@ require ( github.com/mattn/go-ieproxy v0.0.11 // indirect github.com/minio/minio-go v6.0.14+incompatible // indirect github.com/mitchellh/go-homedir v1.1.0 // indirect - github.com/pelletier/go-toml/v2 v2.0.8 // indirect + github.com/pelletier/go-toml/v2 v2.0.9 // indirect github.com/pkg/errors v0.9.1 // indirect github.com/pkg/xattr v0.4.9 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect @@ -93,25 +94,26 @@ require ( github.com/spf13/afero v1.9.5 // indirect github.com/spf13/cast v1.5.1 // indirect github.com/spf13/jwalterweatherman v1.1.0 // indirect - github.com/subosito/gotenv v1.4.2 // indirect + github.com/subosito/gotenv v1.6.0 // indirect github.com/tklauser/go-sysconf v0.3.11 // indirect github.com/tklauser/numcpus v0.6.0 // indirect github.com/wastore/keychain v0.0.0-20180920053336-f2c902a3d807 // indirect github.com/wastore/keyctl v0.3.1 // indirect github.com/yusufpapurcu/wmi v1.2.2 // indirect go.opencensus.io v0.24.0 // indirect - golang.org/x/crypto v0.10.0 // indirect - golang.org/x/net v0.11.0 // indirect - golang.org/x/oauth2 v0.9.0 // indirect + golang.org/x/crypto v0.12.0 // indirect + golang.org/x/net v0.14.0 // indirect + golang.org/x/oauth2 v0.11.0 // indirect golang.org/x/sync v0.3.0 // indirect - golang.org/x/text v0.10.0 // indirect + golang.org/x/sys v0.11.0 // indirect + golang.org/x/text v0.12.0 // indirect golang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2 // indirect - google.golang.org/api v0.129.0 // indirect + google.golang.org/api v0.138.0 // indirect google.golang.org/appengine v1.6.7 // indirect - google.golang.org/genproto v0.0.0-20230628200519-e449d1ea0e82 // indirect - google.golang.org/genproto/googleapis/api v0.0.0-20230628200519-e449d1ea0e82 // indirect - google.golang.org/genproto/googleapis/rpc v0.0.0-20230628200519-e449d1ea0e82 // indirect - google.golang.org/grpc v1.56.1 // indirect + google.golang.org/genproto v0.0.0-20230815205213-6bfd019c3878 // indirect + google.golang.org/genproto/googleapis/api v0.0.0-20230815205213-6bfd019c3878 // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20230815205213-6bfd019c3878 // indirect + google.golang.org/grpc v1.57.0 // indirect google.golang.org/protobuf v1.31.0 // indirect ) diff --git a/go.sum b/go.sum index 9b42b99b9..1ca992f65 100644 --- a/go.sum +++ b/go.sum @@ -17,22 +17,22 @@ cloud.google.com/go v0.65.0/go.mod h1:O5N8zS7uWy9vkA9vayVHs65eM1ubvY4h553ofrNHOb cloud.google.com/go v0.72.0/go.mod h1:M+5Vjvlc2wnp6tjzE102Dw08nGShTscUx2nZMufOKPI= cloud.google.com/go v0.74.0/go.mod h1:VV1xSbzvo+9QJOxLDaJfTjx5e+MePCpCWwvftOeQmWk= cloud.google.com/go v0.75.0/go.mod h1:VGuuCn7PG0dwsd5XPVm2Mm3wlh3EL55/79EKB6hlPTY= -cloud.google.com/go v0.110.3 h1:wwearW+L7sAPSomPIgJ3bVn6Ck00HGQnn5HMLwf0azo= -cloud.google.com/go v0.110.3/go.mod h1:+EYjdK8e5RME/VY/qLCAtuyALQ9q67dvuum8i+H5xsI= +cloud.google.com/go v0.110.7 h1:rJyC7nWRg2jWGZ4wSJ5nY65GTdYJkg0cd/uXb+ACI6o= +cloud.google.com/go v0.110.7/go.mod h1:+EYjdK8e5RME/VY/qLCAtuyALQ9q67dvuum8i+H5xsI= cloud.google.com/go/bigquery v1.0.1/go.mod h1:i/xbL2UlR5RvWAURpBYZTtm/cXjCha9lbfbpx4poX+o= cloud.google.com/go/bigquery v1.3.0/go.mod h1:PjpwJnslEMmckchkHFfq+HTD2DmtT67aNFKH1/VBDHE= cloud.google.com/go/bigquery v1.4.0/go.mod h1:S8dzgnTigyfTmLBfrtrhyYhwRxG72rYxvftPBK2Dvzc= cloud.google.com/go/bigquery v1.5.0/go.mod h1:snEHRnqQbz117VIFhE8bmtwIDY80NLUZUMb4Nv6dBIg= cloud.google.com/go/bigquery v1.7.0/go.mod h1://okPTzCYNXSlb24MZs83e2Do+h+VXtc4gLoIoXIAPc= cloud.google.com/go/bigquery v1.8.0/go.mod h1:J5hqkt3O0uAFnINi6JXValWIb1v0goeZM77hZzJN/fQ= -cloud.google.com/go/compute v1.20.1 h1:6aKEtlUiwEpJzM001l0yFkpXmUVXaN8W+fbkb2AZNbg= -cloud.google.com/go/compute v1.20.1/go.mod h1:4tCnrn48xsqlwSAiLf1HXMQk8CONslYbdiEZc9FEIbM= +cloud.google.com/go/compute v1.23.0 h1:tP41Zoavr8ptEqaW6j+LQOnyBBhO7OkOMAGrgLopTwY= +cloud.google.com/go/compute v1.23.0/go.mod h1:4tCnrn48xsqlwSAiLf1HXMQk8CONslYbdiEZc9FEIbM= cloud.google.com/go/compute/metadata v0.2.3 h1:mg4jlk7mCAj6xXp9UJ4fjI9VUI5rubuGBW5aJ7UnBMY= cloud.google.com/go/compute/metadata v0.2.3/go.mod h1:VAV5nSsACxMJvgaAuX6Pk2AawlZn8kiOGuCv6gTkwuA= cloud.google.com/go/datastore v1.0.0/go.mod h1:LXYbyblFSglQ5pkeyhO+Qmw7ukd3C+pD7TKLgZqpHYE= cloud.google.com/go/datastore v1.1.0/go.mod h1:umbIZjpQpHh4hmRpGhH4tLFup+FVzqBi1b3c64qFpCk= -cloud.google.com/go/iam v1.1.1 h1:lW7fzj15aVIXYHREOqjRBV9PsH0Z6u8Y46a1YGvQP4Y= -cloud.google.com/go/iam v1.1.1/go.mod h1:A5avdyVL2tCppe4unb0951eI9jreack+RJ0/d+KUZOU= +cloud.google.com/go/iam v1.1.2 h1:gacbrBdWcoVmGLozRuStX45YKvJtzIjJdAolzUs1sm4= +cloud.google.com/go/iam v1.1.2/go.mod h1:A5avdyVL2tCppe4unb0951eI9jreack+RJ0/d+KUZOU= cloud.google.com/go/pubsub v1.0.1/go.mod h1:R0Gpsv3s54REJCy4fxDixWD93lHJMoZTyQ2kNxGRt3I= cloud.google.com/go/pubsub v1.1.0/go.mod h1:EwwdRX2sKPjnvnqCa270oGRyludottCI76h+R3AArQw= cloud.google.com/go/pubsub v1.2.0/go.mod h1:jhfEVHT8odbXTkndysNHCcx0awwzvfOlguIAii9o8iA= @@ -43,8 +43,8 @@ cloud.google.com/go/storage v1.6.0/go.mod h1:N7U0C8pVQ/+NIKOBQyamJIeKQKkZ+mxpohl cloud.google.com/go/storage v1.8.0/go.mod h1:Wv1Oy7z6Yz3DshWRJFhqM/UCfaWIRTdp0RXyy7KQOVs= cloud.google.com/go/storage v1.10.0/go.mod h1:FLPqc6j+Ki4BU591ie1oL6qBQGu2Bl/tZ9ullr3+Kg0= cloud.google.com/go/storage v1.14.0/go.mod h1:GrKmX003DSIwi9o29oFT7YDnHYwZoctc3fOKtUw0Xmo= -cloud.google.com/go/storage v1.31.0 h1:+S3LjjEN2zZ+L5hOwj4+1OkGCsLVe0NzpXKQ1pSdTCI= -cloud.google.com/go/storage v1.31.0/go.mod h1:81ams1PrhW16L4kF7qg+4mTq7SRs5HsbDTM0bWvrwJ0= +cloud.google.com/go/storage v1.32.0 h1:5w6DxEGOnktmJHarxAOUywxVW9lbNWIzlzzUltG/3+o= +cloud.google.com/go/storage v1.32.0/go.mod h1:Hhh/dogNRGca7IWv1RC2YqEn0c0G77ctA/OxflYkiD8= dmitri.shuralyov.com/gpu/mtl v0.0.0-20190408044501-666a987793e9/go.mod h1:H6x//7gZCb22OMCxBHrMx7a5I7Hp++hsVxbQ4BYO7hU= github.com/Azure/azure-pipeline-go v0.2.3/go.mod h1:x841ezTBIMG6O3lAcl8ATHnsOPVl2bqk7S3ta6S6u4k= github.com/Azure/azure-pipeline-go v0.2.4-0.20220425205405-09e6f201e1e4 h1:hDJImUzpTAeIw/UasFUUDB/+UsZm5Q/6x2/jKKvEUiw= @@ -228,8 +228,8 @@ github.com/google/pprof v0.0.0-20201023163331-3e6fc7fc9c4c/go.mod h1:kpwsk12EmLe github.com/google/pprof v0.0.0-20201203190320-1bf35d6f28c2/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE= github.com/google/pprof v0.0.0-20201218002935-b9804c9f04c2/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE= github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI= -github.com/google/s2a-go v0.1.4 h1:1kZ/sQM3srePvKs3tXAvQzo66XfcReoqFpIpIccE7Oc= -github.com/google/s2a-go v0.1.4/go.mod h1:Ej+mSEMGRnqRzjc7VtF+jdBwYG5fuJfiZ8ELkjEwM0A= +github.com/google/s2a-go v0.1.5 h1:8IYp3w9nysqv3JH+NJgXJzGbDHzLOTj43BmSkp+O7qg= +github.com/google/s2a-go v0.1.5/go.mod h1:Ej+mSEMGRnqRzjc7VtF+jdBwYG5fuJfiZ8ELkjEwM0A= github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/google/uuid v1.2.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I= @@ -238,8 +238,8 @@ github.com/googleapis/enterprise-certificate-proxy v0.2.5 h1:UR4rDjcgpgEnqpIEvki github.com/googleapis/enterprise-certificate-proxy v0.2.5/go.mod h1:RxW0N9901Cko1VOCW3SXCpWP+mlIEkk2tP7jnHy9a3w= github.com/googleapis/gax-go/v2 v2.0.4/go.mod h1:0Wqv26UfaUD9n4G6kQubkQ+KchISgw+vpHVxEJEs9eg= github.com/googleapis/gax-go/v2 v2.0.5/go.mod h1:DWXyrwAJ9X0FpwwEdw+IPEYBICEFu5mhpdKc/us6bOk= -github.com/googleapis/gax-go/v2 v2.11.0 h1:9V9PWXEsWnPpQhu/PeQIkS4eGzMlTLGgt80cUUI8Ki4= -github.com/googleapis/gax-go/v2 v2.11.0/go.mod h1:DxmR61SGKkGLa2xigwuZIQpkCI2S5iydzRfb3peWZJI= +github.com/googleapis/gax-go/v2 v2.12.0 h1:A+gCJKdRfqXkr+BIRGtZLibNXf0m1f9E4HG56etFpas= +github.com/googleapis/gax-go/v2 v2.12.0/go.mod h1:y+aIqrI5eb1YGMVJfuV3185Ts/D7qKpsEkdD5+I6QGU= github.com/googleapis/google-cloud-go-testing v0.0.0-20200911160855-bcd43fbb19e8/go.mod h1:dvDLG8qkwmyD9a/MJJN3XJcT3xFxOKAvTZGvuZmac9g= github.com/grpc-ecosystem/grpc-gateway v1.16.0/go.mod h1:BDjrQk3hbvj6Nolgz8mAMFbcEtjT1g+wF4CSlocrBnw= github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= @@ -287,8 +287,8 @@ github.com/montanaflynn/stats v0.6.6/go.mod h1:etXPPgVO6n31NxCd9KQUMvCM+ve0ruNzt github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno= github.com/pbnjay/memory v0.0.0-20210728143218-7b4eea64cf58 h1:onHthvaw9LFnH4t2DcNVpwGmV9E1BkGknEliJkfwQj0= github.com/pbnjay/memory v0.0.0-20210728143218-7b4eea64cf58/go.mod h1:DXv8WO4yhMYhSNPKjeNKa5WY9YCIEBRbNzFFPJbWO6Y= -github.com/pelletier/go-toml/v2 v2.0.8 h1:0ctb6s9mE31h0/lhu+J6OPmVeDxJn+kYnJc2jZR9tGQ= -github.com/pelletier/go-toml/v2 v2.0.8/go.mod h1:vuYfssBdrU2XDZ9bYydBu6t+6a6PYNcZljzZR9VXg+4= +github.com/pelletier/go-toml/v2 v2.0.9 h1:uH2qQXheeefCCkuBBSLi7jCiSmj3VRh2+Goq2N7Xxu0= +github.com/pelletier/go-toml/v2 v2.0.9/go.mod h1:tJU2Z3ZkXwnxa4DPO899bsyIoywizdUvyaeZurnPPDc= github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/sftp v1.13.1/go.mod h1:3HaPG6Dq1ILlpPZRO0HVMrsydcdLt6HRDccSgb87qRg= @@ -336,15 +336,16 @@ github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/ github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= github.com/stretchr/testify v1.8.2/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= -github.com/stretchr/testify v1.8.3/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= -github.com/subosito/gotenv v1.4.2 h1:X1TuBLAMDFbaTAChgCBLu3DU3UPyELpnF2jjJ2cz/S8= -github.com/subosito/gotenv v1.4.2/go.mod h1:ayKnFf/c6rvx/2iiLrJUk1e6plDbT3edrFNGqEflhK0= +github.com/subosito/gotenv v1.6.0 h1:9NlTDc1FTs4qu0DDq7AEtTPNw6SVm7uBMsUCUjABIf8= +github.com/subosito/gotenv v1.6.0/go.mod h1:Dk4QP5c2W3ibzajGcXpNraDfq2IrhjMIvMSWPKKo0FU= github.com/tklauser/go-sysconf v0.3.11 h1:89WgdJhk5SNwJfu+GKyYveZ4IaJ7xAkecBo+KdJV0CM= github.com/tklauser/go-sysconf v0.3.11/go.mod h1:GqXfhXY3kiPa0nAXPDIQIWzJbMCB7AmcWpGR8lSZfqI= github.com/tklauser/numcpus v0.6.0 h1:kebhY2Qt+3U6RNK7UqpYNA+tJ23IBEGKkB7JQBfDYms= github.com/tklauser/numcpus v0.6.0/go.mod h1:FEZLMke0lhOUG6w2JadTzp0a+Nl8PF/GFkQ5UVIcaL4= +github.com/vibhansa-msft/tlru v0.0.0-20230621165448-dbd42234ad22 h1:PwqTV6gPLW3m7H47Wv0xkj47OLPnoOoFzvPInqv9mAo= +github.com/vibhansa-msft/tlru v0.0.0-20230621165448-dbd42234ad22/go.mod h1:7G2C64UXEWNr8oUzspzcrymxCjD9fKAKTGbL7zO2GW8= github.com/wastore/keychain v0.0.0-20180920053336-f2c902a3d807 h1:Uzh85j0tl46Sf2OOx1wDePSWkz3Eq8XdCFkLXqaX8Bg= github.com/wastore/keychain v0.0.0-20180920053336-f2c902a3d807/go.mod h1:zI8umr7xnBSyT9ZJ8wn48RiQ0EWXo4xmYLNw9FQvC9w= github.com/wastore/keyctl v0.3.1 h1:wMkYW9y9jGbQ1ARBLGLwnDdbgrkbuSeuIQeHy+BZOU0= @@ -382,8 +383,8 @@ golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5y golang.org/x/crypto v0.0.0-20220314234659-1baeb1ce4c0b/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4= golang.org/x/crypto v0.0.0-20220722155217-630584e8d5aa/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4= golang.org/x/crypto v0.6.0/go.mod h1:OFC/31mSvZgRz0V1QTNCzfAI1aIRzbiufJtkMIlEp58= -golang.org/x/crypto v0.10.0 h1:LKqV2xt9+kDzSTfOhx4FrkEBcMrAgHSYgzywV9zcGmM= -golang.org/x/crypto v0.10.0/go.mod h1:o4eNf7Ede1fv+hwOwZsTHl9EsPFO6q6ZvYR8vYfY45I= +golang.org/x/crypto v0.12.0 h1:tFM/ta59kqch6LlvYnPa0yx5a83cL2nHflFhYKvv9Yk= +golang.org/x/crypto v0.12.0/go.mod h1:NF0Gs7EO5K4qLn+Ylc+fih8BSTeIjAP05siRnAh98yw= golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20190510132918-efd6b22b2522/go.mod h1:ZjyILWgesfNpC6sMxTJOJm9Kp84zZh5NQWvqDGG3Qr8= @@ -461,8 +462,8 @@ golang.org/x/net v0.0.0-20210614182718-04defd469f4e/go.mod h1:9nx3DQGgdP8bBQD5qx golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= -golang.org/x/net v0.11.0 h1:Gi2tvZIJyBtO9SDr1q9h5hEQCp/4L2RQ+ar0qjx2oNU= -golang.org/x/net v0.11.0/go.mod h1:2L/ixqYpgIVXmeoSA/4Lu7BzTG4KIyPIryS4IsOd1oQ= +golang.org/x/net v0.14.0 h1:BONx9s002vGdD9umnlX1Po8vOZmrgH34qlHcD1MfK14= +golang.org/x/net v0.14.0/go.mod h1:PpSgVXXLK0OxS0F31C1/tv6XNguvCrnXIDrFMspZIUI= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= @@ -472,8 +473,8 @@ golang.org/x/oauth2 v0.0.0-20200902213428-5d25da1a8d43/go.mod h1:KelEdhl1UZF7XfJ golang.org/x/oauth2 v0.0.0-20201109201403-9fd604954f58/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= golang.org/x/oauth2 v0.0.0-20201208152858-08078c50e5b5/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= golang.org/x/oauth2 v0.0.0-20210218202405-ba52d332ba99/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= -golang.org/x/oauth2 v0.9.0 h1:BPpt2kU7oMRq3kCHAA1tbSEshXRw1LpG2ztgDwrzuAs= -golang.org/x/oauth2 v0.9.0/go.mod h1:qYgFZaFiu6Wg24azG8bdV52QJXJGbZzIIsRCdVKzbLw= +golang.org/x/oauth2 v0.11.0 h1:vPL4xzxBM4niKCW6g9whtaWVXTJf1U5e4aZxxFx/gbU= +golang.org/x/oauth2 v0.11.0/go.mod h1:LdF7O/8bLR/qWK9DrpXmbHLTouvRHK0SgJl0GmDBchk= golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= @@ -551,8 +552,8 @@ golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= golang.org/x/text v0.3.8/go.mod h1:E6s5w1FMmriuDzIBO73fBruAKo1PCIq6d2Q6DHfQ8WQ= golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= -golang.org/x/text v0.10.0 h1:UpjohKhiEgNc0CSauXmwYftY1+LlaC75SJwh0SgCX58= -golang.org/x/text v0.10.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE= +golang.org/x/text v0.12.0 h1:k+n5B8goJNdU7hSvEtMUz3d1Q6D/XW4COJSJR6fN0mc= +golang.org/x/text v0.12.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE= golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20191024005414-555d28b269f0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= @@ -630,8 +631,8 @@ google.golang.org/api v0.30.0/go.mod h1:QGmEvQ87FHZNiUVJkT14jQNYJ4ZJjdRF23ZXz513 google.golang.org/api v0.35.0/go.mod h1:/XrVsuzM0rZmrsbjJutiuftIzeuTQcEeaYcSk/mQ1dg= google.golang.org/api v0.36.0/go.mod h1:+z5ficQTmoYpPn8LCUNVpK5I7hwkpjbcgqA7I34qYtE= google.golang.org/api v0.40.0/go.mod h1:fYKFpnQN0DsDSKRVRcQSDQNtqWPfM9i+zNPxepjRCQ8= -google.golang.org/api v0.129.0 h1:2XbdjjNfFPXQyufzQVwPf1RRnHH8Den2pfNE2jw7L8w= -google.golang.org/api v0.129.0/go.mod h1:dFjiXlanKwWE3612X97llhsoI36FAoIiRj3aTl5b/zE= +google.golang.org/api v0.138.0 h1:K/tVp05MxNVbHShRw9m7e9VJGdagNeTdMzqPH7AUqr0= +google.golang.org/api v0.138.0/go.mod h1:4xyob8CxC+0GChNBvEUAk8VBKNvYOTWM9T3v3UfRxuY= google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= google.golang.org/appengine v1.5.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= @@ -677,12 +678,12 @@ google.golang.org/genproto v0.0.0-20201210142538-e3217bee35cc/go.mod h1:FWY/as6D google.golang.org/genproto v0.0.0-20201214200347-8c77b98c765d/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= google.golang.org/genproto v0.0.0-20210108203827-ffc7fda8c3d7/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= google.golang.org/genproto v0.0.0-20210226172003-ab064af71705/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= -google.golang.org/genproto v0.0.0-20230628200519-e449d1ea0e82 h1:Wdfp5Hc1bqGCWYZNrir4A1Jb+SmVaV2j1DL/pbMMTGI= -google.golang.org/genproto v0.0.0-20230628200519-e449d1ea0e82/go.mod h1:xZnkP7mREFX5MORlOPEzLMr+90PPZQ2QWzrVTWfAq64= -google.golang.org/genproto/googleapis/api v0.0.0-20230628200519-e449d1ea0e82 h1:iI5Fmsfz4zDINYxJLxn2YChI//ypkHM/KuVSvlN7ZXk= -google.golang.org/genproto/googleapis/api v0.0.0-20230628200519-e449d1ea0e82/go.mod h1:vHYtlOoi6TsQ3Uk2yxR7NI5z8uoV+3pZtR4jmHIkRig= -google.golang.org/genproto/googleapis/rpc v0.0.0-20230628200519-e449d1ea0e82 h1:6b+zGQBiXFlAMpQr+cCarAdrZD4QgXSG7uUZadYysgg= -google.golang.org/genproto/googleapis/rpc v0.0.0-20230628200519-e449d1ea0e82/go.mod h1:66JfowdXAEgad5O9NnYcsNPLCPZJD++2L9X0PCMODrA= +google.golang.org/genproto v0.0.0-20230815205213-6bfd019c3878 h1:Iveh6tGCJkHAjJgEqUQYGDGgbwmhjoAOz8kO/ajxefY= +google.golang.org/genproto v0.0.0-20230815205213-6bfd019c3878/go.mod h1:yZTlhN0tQnXo3h00fuXNCxJdLdIdnVFVBaRJ5LWBbw4= +google.golang.org/genproto/googleapis/api v0.0.0-20230815205213-6bfd019c3878 h1:WGq4lvB/mlicysM/dUT3SBvijH4D3sm/Ny1A4wmt2CI= +google.golang.org/genproto/googleapis/api v0.0.0-20230815205213-6bfd019c3878/go.mod h1:KjSP20unUpOx5kyQUFa7k4OJg0qeJ7DEZflGDu2p6Bk= +google.golang.org/genproto/googleapis/rpc v0.0.0-20230815205213-6bfd019c3878 h1:lv6/DhyiFFGsmzxbsUUTOkN29II+zeWHxvT8Lpdxsv0= +google.golang.org/genproto/googleapis/rpc v0.0.0-20230815205213-6bfd019c3878/go.mod h1:+Bk1OCOj40wS2hwAMA+aCW9ypzm63QTBBHp6lQ3p+9M= google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= google.golang.org/grpc v1.20.1/go.mod h1:10oTOabMzJvdu6/UiuZezV6QK5dSlG84ov/aaiqXj38= google.golang.org/grpc v1.21.1/go.mod h1:oYelfM1adQP15Ek0mdvEgi9Df8B9CZIaU1084ijfRaM= @@ -702,8 +703,8 @@ google.golang.org/grpc v1.34.0/go.mod h1:WotjhfgOW/POjDeRt8vscBtXq+2VjORFy659qA5 google.golang.org/grpc v1.35.0/go.mod h1:qjiiYl8FncCW8feJPdyg3v6XW24KsRHe+dy9BAGRRjU= google.golang.org/grpc v1.36.0/go.mod h1:qjiiYl8FncCW8feJPdyg3v6XW24KsRHe+dy9BAGRRjU= google.golang.org/grpc v1.45.0/go.mod h1:lN7owxKUQEqMfSyQikvvk5tf/6zMPsrK+ONuO11+0rQ= -google.golang.org/grpc v1.56.1 h1:z0dNfjIl0VpaZ9iSVjA6daGatAYwPGstTjt5vkRMFkQ= -google.golang.org/grpc v1.56.1/go.mod h1:I9bI3vqKfayGqPUAwGdOSu7kt6oIJLixfffKrpXqQ9s= +google.golang.org/grpc v1.57.0 h1:kfzNeI/klCGD2YPMUlaGNT3pxvYfga7smW3Vth8Zsiw= +google.golang.org/grpc v1.57.0/go.mod h1:Sd+9RMTACXwmub0zcNY2c4arhtrbBYD1AUHI/dt16Mo= google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8= google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0= google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM= diff --git a/go_installer.sh b/go_installer.sh index 4f8cac7de..6cb96c3b4 100755 --- a/go_installer.sh +++ b/go_installer.sh @@ -1,10 +1,15 @@ #!/bin/bash work_dir=$(echo $1 | sed 's:/*$::') -if go version | grep -q "$2"; then - echo "Exists" -else - wget "https://golang.org/dl/go$2.linux-amd64.tar.gz" -P "$work_dir" - sudo rm -rf /usr/local/go - sudo tar -C /usr/local -xzf "$work_dir"/go"$2".linux-amd64.tar.gz - sudo ln -sf /usr/local/go/bin/go /usr/bin/go +version="1.20.5" +arch=`hostnamectl | grep "Arch" | rev | cut -d " " -f 1 | rev` + +if [ $arch != "arm64" ] +then + arch="amd64" fi + +echo "Installing on : " $arch " Version : " $version +wget "https://golang.org/dl/go$version.linux-$arch.tar.gz" -P "$work_dir" +sudo rm -rf /usr/local/go +sudo tar -C /usr/local -xzf "$work_dir"/go"$version".linux-$arch.tar.gz +sudo ln -sf /usr/local/go/bin/go /usr/bin/go diff --git a/internal/handlemap/handle_map.go b/internal/handlemap/handle_map.go index 22137322c..11a45ac36 100644 --- a/internal/handlemap/handle_map.go +++ b/internal/handlemap/handle_map.go @@ -35,6 +35,7 @@ package handlemap import ( + "container/list" "os" "sync" "time" @@ -66,10 +67,16 @@ type Cache struct { HandleCount int64 } +type Buffers struct { + Cooked *list.List + Cooking *list.List +} + type Handle struct { sync.RWMutex FObj *os.File // File object being represented by this handle CacheObj *Cache // Streaming layer cache for this handle + Buffers *Buffers ID HandleID // Blobfuse assigned unique ID to this handle Size int64 // Size of the file being handled here Mtime time.Time @@ -90,6 +97,7 @@ func NewHandle(path string) *Handle { values: make(map[string]interface{}), CacheObj: nil, FObj: nil, + Buffers: nil, } } @@ -125,33 +133,27 @@ func (handle *Handle) FD() int { // SetValue : Store user defined parameter inside handle func (handle *Handle) SetValue(key string, value interface{}) { - handle.Lock() handle.values[key] = value - handle.Unlock() } // GetValue : Retrieve user defined parameter from handle func (handle *Handle) GetValue(key string) (interface{}, bool) { - handle.RLock() val, ok := handle.values[key] - handle.RUnlock() return val, ok } -// RemoveValue : Delete user defined parameter from handle -func (handle *Handle) RemoveValue(key string) { - handle.Lock() +// GetValue : Retrieve user defined parameter from handle +func (handle *Handle) RemoveValue(key string) (interface{}, bool) { + val, ok := handle.values[key] delete(handle.values, key) - handle.Unlock() + return val, ok } // Cleanup : Delete all user defined parameter from handle func (handle *Handle) Cleanup() { - handle.Lock() for key := range handle.values { delete(handle.values, key) } - handle.Unlock() } // defaultHandleMap holds a synchronized map[ HandleID ]*Handle diff --git a/setup/baseConfig.yaml b/setup/baseConfig.yaml index 41761ca3d..2a42c8267 100644 --- a/setup/baseConfig.yaml +++ b/setup/baseConfig.yaml @@ -53,6 +53,7 @@ logging: components: - libfuse - stream + - block_cache - file_cache - attr_cache - azstorage @@ -80,6 +81,17 @@ stream: buffer-size-mb: file-caching: +# Block cache related configuration. Block cache is only supported on Linux. +block_cache: + block-size-mb: + mem-size-mb: + path: + disk-size-mb: + disk-timeout-sec: + prefetch: + parallelism: + prefetch-on-open: true|false + # Disk cache related configuration file_cache: # Required diff --git a/test/benchmark_test/fio.cfg b/test/benchmark_test/fio.cfg new file mode 100644 index 000000000..53f0ae36b --- /dev/null +++ b/test/benchmark_test/fio.cfg @@ -0,0 +1,13 @@ +# to use this config use 'fio ./test/benchmark_test/fio.cfg' + +[global] +ioengine=sync +size=15G +bs=16M +rw=randread +#rw=read +filename= /usr/blob_mnt/testFile15GB +numjobs=20 + +[job] +name=seq_read \ No newline at end of file diff --git a/test/longhaul/longhaul.sh b/test/longhaul/longhaul.sh index ba6f5c903..b3710b144 100755 --- a/test/longhaul/longhaul.sh +++ b/test/longhaul/longhaul.sh @@ -1,4 +1,10 @@ +# To create ramdisk +# sudo mkdir -p /mnt/ramdisk +# sudo chmod 777 /mnt/ramdisk +# sudo mount -t tmpfs -o rw,size=4G tmpfs /mnt/ramdisk + + SERVICE="cloudfuse" SCRIPT="longhaul.sh" diff --git a/test/scripts/fio.sh b/test/scripts/fio.sh old mode 100644 new mode 100755 diff --git a/test/scripts/fio/random_read_20T_100G.fio b/test/scripts/fio/random_read_20T_100G.fio new file mode 100755 index 000000000..ca9ca5a5b --- /dev/null +++ b/test/scripts/fio/random_read_20T_100G.fio @@ -0,0 +1,9 @@ +[global] +ioengine=sync +size=100G +bs=32M +rw=randread +filename=/tmp/mntpoint/sample100G +numjobs=20 +[job] +name=seq_read \ No newline at end of file diff --git a/test/scripts/fio/random_read_4T_100G.fio b/test/scripts/fio/random_read_4T_100G.fio new file mode 100755 index 000000000..dab713fe6 --- /dev/null +++ b/test/scripts/fio/random_read_4T_100G.fio @@ -0,0 +1,9 @@ +[global] +ioengine=sync +size=100G +bs=32M +rw=randread +filename=/tmp/mntpoint/sample100G +numjobs=4 +[job] +name=seq_read \ No newline at end of file diff --git a/test/scripts/fio/random_read_4T_4G.fio b/test/scripts/fio/random_read_4T_4G.fio new file mode 100755 index 000000000..7f8f500d0 --- /dev/null +++ b/test/scripts/fio/random_read_4T_4G.fio @@ -0,0 +1,9 @@ +[global] +ioengine=sync +size=4G +bs=32M +rw=randread +filename=/tmp/mntpoint/sample4G +numjobs=4 +[job] +name=seq_read \ No newline at end of file diff --git a/test/scripts/fio/sequential_read_128T_100G.fio b/test/scripts/fio/sequential_read_128T_100G.fio new file mode 100755 index 000000000..b1b30a373 --- /dev/null +++ b/test/scripts/fio/sequential_read_128T_100G.fio @@ -0,0 +1,9 @@ +[global] +ioengine=sync +size=100G +bs=32M +rw=read +filename=/tmp/mntpoint/sample100G +numjobs=128 +[job] +name=seq_read \ No newline at end of file diff --git a/test/scripts/fio/sequential_read_1T_4G.fio b/test/scripts/fio/sequential_read_1T_4G.fio new file mode 100755 index 000000000..595717de5 --- /dev/null +++ b/test/scripts/fio/sequential_read_1T_4G.fio @@ -0,0 +1,9 @@ +[global] +ioengine=sync +size=4G +bs=512M +rw=randread +filename=/tmp/mntpoint/sample4G +numjobs=1 +[job] +name=seq_read \ No newline at end of file diff --git a/test/scripts/fio/sequential_read_20T_100G.fio b/test/scripts/fio/sequential_read_20T_100G.fio new file mode 100755 index 000000000..fc98669d1 --- /dev/null +++ b/test/scripts/fio/sequential_read_20T_100G.fio @@ -0,0 +1,9 @@ +[global] +ioengine=sync +size=100G +bs=32M +rw=read +filename=/tmp/mntpoint/sample100G +numjobs=20 +[job] +name=seq_read \ No newline at end of file diff --git a/test/scripts/fio/sequential_read_4T_100G.fio b/test/scripts/fio/sequential_read_4T_100G.fio new file mode 100755 index 000000000..5d61c2c1e --- /dev/null +++ b/test/scripts/fio/sequential_read_4T_100G.fio @@ -0,0 +1,9 @@ +[global] +ioengine=sync +size=100G +bs=32M +rw=read +filename=/tmp/mntpoint/sample100G +numjobs=4 +[job] +name=seq_read \ No newline at end of file diff --git a/test/scripts/fio/sequential_read_4T_4G.fio b/test/scripts/fio/sequential_read_4T_4G.fio new file mode 100755 index 000000000..0a227fff2 --- /dev/null +++ b/test/scripts/fio/sequential_read_4T_4G.fio @@ -0,0 +1,9 @@ +[global] +ioengine=sync +size=4G +bs=32M +rw=read +filename=/tmp/mntpoint/sample4G +numjobs=4 +[job] +name=seq_read \ No newline at end of file diff --git a/test/scripts/latency_hiding.jl b/test/scripts/latency_hiding.jl new file mode 100755 index 000000000..b0f2a6ca8 --- /dev/null +++ b/test/scripts/latency_hiding.jl @@ -0,0 +1,43 @@ +using TickTock +using .Libc + +sleep_time = parse(Float64, ARGS[1]) + +block_size = 64 * 1024 * 1024 #bytes +file_path = "/tmp/mntpoint1/sample31" + +block_count = Int(floor(filesize(file_path) / block_size)) +in_data = open(file_path, "r") +buffer = Array{UInt8, 1}(undef, block_size) +computation_time = 0.0 +runtime = 0.0 +readtime = 0.0 +dummy = 0 +println("SleepTime: ", sleep_time) +println("Bytewise XOR of ", Int(block_count), " blocks of size ", Int(block_size), "B") + +tick() #start timer +for i = 1:block_count + global dummy, computation_time, runtime, readtime, sleep_time + t0 = time_ns() + read!(in_data, buffer) + t1 = time_ns() + dummy = xor(dummy, reduce(xor, buffer)) + Libc.systemsleep(sleep_time) #additional calculation time in seconds + t2 = time_ns() + readtime += t1-t0 + computation_time += t2-t1 + println("Instantaneous readTime: ", t1-t0, "ns") + runtime += t2-t0 +end +tock() #stop timer and output time difference + +close(in_data) +println("cummalative_read_time=", readtime/1.0e9, "s") +println("cummalative_computation_time=", computation_time/1.0e9, "s") +println("cummalative_runtime=", runtime/1.0e9, "s") +println("xor=", dummy) #output dummy to keep JIT compiler from removing xor and read + +timing_output = open("latency_hiding.dat", "a") +write(timing_output, string(runtime/1.0e9) * " " * string(computation_time/1.0e9) * " " * string(readtime/1.0e9) * "\n") +close(timing_output) \ No newline at end of file diff --git a/test/scripts/parallel_batch_read.py b/test/scripts/parallel_batch_read.py new file mode 100755 index 000000000..0ad7fb520 --- /dev/null +++ b/test/scripts/parallel_batch_read.py @@ -0,0 +1,83 @@ + +import os +import threading +import hashlib +import datetime +import random +# Directory containing the files + +mountroot = "/tmp/mntpoint1" +mountpath = "/smallfiles/createfiles" +data_dir = mountroot + mountpath + +# Number of files to read in each batch +batch_size = 10000 + +# block size of the read requests +blockSize = 8 * 1024 * 1024 + +def read_batch(batch): + for filename in batch: + open_start_time = datetime.datetime.now() + print("filename: "+ filename + "PID: " + str(os.getpid()) + "TID: "+ str(threading.get_native_id())) + + f1 = "" + with open(os.path.join(data_dir, filename), "rb") as f: + open_end_time = datetime.datetime.now() - open_start_time + + start_time = datetime.datetime.now() + size = 0 + cnt = 0 + while True: + cnt += 1 + data = f.read(blockSize) + + # Needed for data validation, increases time taken so remove if not needed + f1+=str(data) + size += len(data) + if not data: + break + + end_time = datetime.datetime.now() + print(filename + ":readtime:" + str(end_time - start_time) + ":opentime:" + str(open_end_time)+ ":openstarttime:" + str(open_start_time) + "size: " + str(size)) + f.close() + + hash = hashlib.md5(f1.encode()).hexdigest() + print(hash) + +# Number of iterations to run +count = 1 +while count>0: + count -= 1 + + # list files and calculate list time + start_time = datetime.datetime.now() + filenames = os.listdir(data_dir) + + end_time = datetime.datetime.now() + print("FileListTime: " + str(end_time - start_time)) + + # for local testing use by name + #filenames = ["sample31", "sample31"] + + random.shuffle(filenames) + + # create batches and threads + batches = [filenames[i:i+batch_size] for i in range(0, len(filenames), batch_size)] + print(len(batches)) + + threads = [] + for batch in batches: + t = threading.Thread(target=read_batch, args=(batch,)) + threads.append(t) + + start_time = datetime.datetime.now() + + for t in threads: + t.start() + + for t in threads: + t.join() + + end_time = datetime.datetime.now() + print("FileOpenReadCloseTime: " + end_time - start_time) \ No newline at end of file diff --git a/test/scripts/profile-latency-hiding.sh b/test/scripts/profile-latency-hiding.sh new file mode 100755 index 000000000..a80949440 --- /dev/null +++ b/test/scripts/profile-latency-hiding.sh @@ -0,0 +1,32 @@ +#!/bin/bash +# Needs to be run with sudo +# This scripts adds a computation time after reading a block, this simulates real training scenarios. +# Due to prefetch, we should hide actual network latency due to prefetching in the background. + +set -euxo pipefail + +BLOBFUSE2_DIR="/tmp/mntpoint" +RAMDISK_DIR="/tmp/cache" + +# CLean up cache path +rm -rf $RAMDISK_DIR/* + +for i in `seq 0 40`; do + fusermount -u $BLOBFUSE2_DIR + + # note: blobfuse2 install see + # https://learn.microsoft.com/en-us/azure/storage/blobs/blobfuse2-how-to-deploy#configure-the-microsoft-package-repository + + # mounts + ./blobfuse2 mount /tmp/mntpoint --config-file=./config.yml -o ro + + # clear caches to be sure + sync + echo 3 > /proc/sys/vm/drop_caches + + # sometimes blobfuse terminates without error but reports no files in mount dir for a second + sleep 1 + + t="`awk \"BEGIN {print ($i*0.5)}\"`" + julia latency_hiding.jl $t +done \ No newline at end of file diff --git a/test/scripts/readwrite.c b/test/scripts/readwrite.c old mode 100644 new mode 100755 diff --git a/test/test_utils/dir_list_seek.c b/test/test_utils/dir_list_seek.c new file mode 100644 index 000000000..2938a5389 --- /dev/null +++ b/test/test_utils/dir_list_seek.c @@ -0,0 +1,66 @@ +#include +#include +#include +#include +#include +#include +#include + +#define handle_error(msg) \ + do { perror(msg); exit(EXIT_FAILURE); } while (0) + +// To compile this file use "gcc dir_list_seek.c -o dir_list_seek" +// To run this util use "./dir_list_seek 1 ? argv[1] : ".", O_RDONLY | O_DIRECTORY); + if(fd == -1) + handle_error("open"); + + for(int i = 0; i < 10; i++) { + lseek(fd, 0, SEEK_SET); + nread = syscall(SYS_getdents, fd, buf, BUF_SIZE); + if(nread == -1) + handle_error("getdents"); + + if(nread == 0) + break; + + printf("--------------- nread=%d ---------------\n", nread); + printf("i-node# file type d_reclen d_off d_name\n"); + + for(bpos=0; bposd_ino); + d_type = *(buf + bpos + d->d_reclen - 1); + printf("%-10s ", (d_type == DT_REG) ? "regular" : + (d_type == DT_DIR) ? "directory" : + (d_type == DT_FIFO) ? "FIFO" : + (d_type == DT_SOCK) ? "socket" : + (d_type == DT_LNK) ? "symlink" : + (d_type == DT_BLK) ? "block dev" : + (d_type == DT_CHR) ? "char dev" : "???"); + printf("%4d %10lld %s\n", d->d_reclen, + (long long) d->d_off, d->d_name); + bpos += d->d_reclen; + } + } + + return 0; +} \ No newline at end of file diff --git a/testdata/config/azure_key_bc.yaml b/testdata/config/azure_key_bc.yaml new file mode 100644 index 000000000..cd01c7181 --- /dev/null +++ b/testdata/config/azure_key_bc.yaml @@ -0,0 +1,41 @@ +logging: + level: log_debug + file-path: "blobfuse2-logs.txt" + type: base + +components: + - libfuse + - block_cache + - attr_cache + - azstorage + +libfuse: + attribute-expiration-sec: 0 + entry-expiration-sec: 0 + negative-entry-expiration-sec: 0 + ignore-open-flags: true + +block_cache: + block-size-mb: 8 + mem-size-mb: 4192 + + path: block_cache + disk-size-mb: 4192 + disk-timeout-sec: 120 + + prefetch: 12 + parallelism: 128 + +attr_cache: + timeout-sec: 3600 + +azstorage: + type: { ACCOUNT_TYPE } + endpoint: { ACCOUNT_ENDPOINT } + use-http: { USE_HTTP } + account-name: { NIGHTLY_STO_ACC_NAME } + account-key: { NIGHTLY_STO_ACC_KEY } + mode: key + container: { 0 } + tier: hot + sdk-trace: { VERBOSE_LOG } diff --git a/tools/health-monitor/internal/stats_export.go b/tools/health-monitor/internal/stats_export.go index 2fa83bdd3..ec16251fb 100644 --- a/tools/health-monitor/internal/stats_export.go +++ b/tools/health-monitor/internal/stats_export.go @@ -63,7 +63,7 @@ type StatsExporter struct { type Output struct { Timestamp string `json:"Timestamp,omitempty"` - Cfs []stats_manager.PipeMsg `json:"BlobfuseStats,omitempty"` + Cfs []stats_manager.PipeMsg `json:"CloudfuseStats,omitempty"` FcEvent []*hmcommon.CacheEvent `json:"FileCache,omitempty"` Cpu string `json:"CPUUsage,omitempty"` Mem string `json:"MemoryUsage,omitempty"` From a4f576d18d79e981590b41dfeba1f7d37afb403e Mon Sep 17 00:00:00 2001 From: Michael Habinsky Date: Tue, 12 Sep 2023 22:28:06 +0000 Subject: [PATCH 3/3] Merged PR 71: Allow AWS Default Authentication Methods Allow AWS SDK to use .aws/credentials file and/or environment variables to authenticate with S3 cloud storage. --- README.md | 372 +++++++++++++++---------- component/s3storage/client.go | 49 ++-- component/s3storage/client_test.go | 48 +++- component/s3storage/config.go | 12 +- component/s3storage/config_test.go | 20 +- component/s3storage/connection.go | 9 +- component/s3storage/s3storage.go | 5 +- component/s3storage/s3storage_test.go | 12 +- component/s3storage/s3wrappers_test.go | 6 +- setup/baseConfig.yaml | 7 +- 10 files changed, 327 insertions(+), 213 deletions(-) diff --git a/README.md b/README.md index 8397bde6d..90598d27c 100755 --- a/README.md +++ b/README.md @@ -1,16 +1,20 @@ # Cloudfuse - An S3 and Azure Storage FUSE driver ## About -Cloudfuse is a fork of the open source project [blobfuse2](https://github.com/Azure/azure-storage-fuse) from Microsoft -that adds support for S3 storage, a GUI for configuration and mounting, and Windows support. It provides a virtual -filesystem backed by either S3 or Azure Storage. It uses the libfuse open source library (fuse) to communicate with the -Linux FUSE kernel module and uses WinFSP to support running on Windows. It implements the filesystem operations using -the S3 and Azure Storage REST APIs. - - -Cloudfuse is stable, provided that it is used within its limits documented here. Cloudfuse supports both reads and -writes, however, it does not guarantee continuous sync of data written to storage using other APIs or other mounts of -Cloudfuse. For data integrity it is recommended that multiple sources do not modify the same blob/object/file. Please -submit an issue [here]() for any issues/feature requests/questions. +Cloudfuse is a fork of the open source project +[blobfuse2](https://github.com/Azure/azure-storage-fuse) from Microsoft that +adds support for S3 storage, a GUI for configuration and mounting, and Windows +support. It provides a virtual filesystem backed by either S3 or Azure Storage. +It uses the libfuse open source library (fuse) to communicate with the Linux +FUSE kernel module and uses WinFSP to support running on Windows. It implements +the filesystem operations using the S3 and Azure Storage REST APIs. + +Cloudfuse is stable, provided that it is used within its limits documented here. +Cloudfuse supports both reads and writes, however, it does not guarantee +continuous sync of data written to storage using other APIs or other mounts of +Cloudfuse. For data integrity it is recommended that multiple sources do not +modify the same blob/object/file. Please submit an issue +[here](https://github.com/Seagate/cloudfuse/issues) for any issues/feature +requests/questions. ## NOTICE @@ -22,55 +26,64 @@ Visit [this](https://github.com/Azure/azure-storage-fuse/wiki/Blobfuse2-Supporte ---> ## Features -- Mount an S3 bucket or Azure storage container or datalake file system on Linux and Windows. -- Basic file system operations such as mkdir, opendir, readdir, rmdir, open, read, create, write, close, unlink, - truncate, stat, rename +- Mount an S3 bucket or Azure storage container or datalake file system on Linux + and Windows. +- Basic file system operations such as mkdir, opendir, readdir, rmdir, open, + read, create, write, close, unlink, truncate, stat, rename - Local caching to improve subsequent access times -- Streaming to support reading AND writing large files +- Streaming to support reading AND writing large files - Parallel downloads and uploads to improve access time for large files - Multiple mounts to the same container for read-only workloads ## Health Monitor -Cloudfuse also supports a health monitor. It allows customers gain more insight into how their Cloudfuse instance is -behaving with the rest of their machine. Visit [here](tools/health-monitor/README.md) to set it up. +Cloudfuse also supports a health monitor. It allows customers gain more insight +into how their Cloudfuse instance is behaving with the rest of their machine. +Visit [here](tools/health-monitor/README.md) to set it up. ## Features compared to blobfuse2 -- Supports any S3 compatable storage +- Supports any S3 compatible storage - Adds a GUI to configure and start mounts - Runs on Windows using WinFSP in foreground or as a Windows service ## Download Cloudfuse -You can install Cloudfuse by cloning this repository. In the workspace execute the build script `./build.sh` to build -the binary. This will build a binary both for Linux or for Windows depending on the OS you are using. +You can install Cloudfuse by cloning this repository. In the workspace execute +the build script `./build.sh` to build the binary. This will build a binary both +for Linux or for Windows depending on the OS you are using. ### Linux -Cloudfuse currently only supports libfuse2. On Linux, you need to install the libfuse2 package, for example on Ubuntu: - +Cloudfuse currently only supports libfuse2. On Linux, you need to install the +libfuse2 package, for example on Ubuntu: + sudo apt install libfuse2 #### Running on Linux -To start your mount of an S3 Bucket or Azure Container use the `mount` command and specifiy the location of your config -file. See [config file](#Config-file) for information about the config file. By default, cloudfuse will run in the -background which allows you to close the terminal when you start a mount. If you would like it to run in the foreground -you can specificy `foreground: true` in your config file or pass `--foreground=true` as an argument when mounting. +To start your mount of an S3 Bucket or Azure Container use the `mount` command +and specify the location of your config file. See [config file](#config-file) +for information about the config file. By default, cloudfuse will run in the +background which allows you to close the terminal when you start a mount. If you +would like it to run in the foreground you can specify `foreground: true` in +your config file or pass `--foreground=true` as an argument when mounting. cloudfuse mount --config-file= ### Windows -On Windows, you also need to install the third party utility [WinFsp](https://winfsp.dev/). To download WinFsp, please -run the WinFsp installer found [here](https://winfsp.dev/rel/). +On Windows, you also need to install the third party utility +[WinFsp](https://winfsp.dev/). To download WinFsp, please run the WinFsp +installer found [here](https://winfsp.dev/rel/). See [here](WINDOWS.md) for how to setup Cloudfuse to run on Windows. ## Supported Operations -The general format of the Cloudfuse commands is `cloudfuse [command] [arguments] --[flag-name]=[flag-value]` +The general format of the Cloudfuse commands is `cloudfuse [command] [arguments] +--[flag-name]=[flag-value]` * `help` - Help about any command -* `mount` - Mounts an Azure container as a filesystem. The supported containers include +* `mount` - Mounts a cloud storage container as a filesystem. The supported + containers include - S3 Bucket - Azure Blob Container - Azure Datalake Gen2 Container -* `mount all` - Mounts all the containers in an S3 Account or Azure account as a filesystem. The supported storage - services include +* `mount all` - Mounts all the containers in an S3 Account or Azure account as a + filesystem. The supported storage services include - [S3 Storage](https://aws.amazon.com/s3/) - [Blob Storage](https://docs.microsoft.com/en-us/azure/storage/blobs/storage-blobs-introduction) - [Datalake Storage Gen2](https://docs.microsoft.com/en-us/azure/storage/blobs/data-lake-storage-introduction) @@ -83,8 +96,9 @@ The general format of the Cloudfuse commands is `cloudfuse [command] [arguments] * `unmount all` - Unmounts all Cloudfuse filesystems. ## Find help from your command prompt -To see a list of commands, type `cloudfuse -h` and then press the ENTER key. To learn about a specific command, just -include the name of the command (For example: `cloudfuse mount -h`). +To see a list of commands, type `cloudfuse -h` and then press the ENTER key. To +learn about a specific command, just include the name of the command (For +example: `cloudfuse mount -h`). ## Usage - Mount with cloudfuse @@ -96,178 +110,248 @@ include the name of the command (For example: `cloudfuse mount -h`). - Unmount cloudfuse on Linux * cloudfuse unmount \ - Unmount all cloudfuse instances on Linux - * cloudfuse unmount all + * cloudfuse unmount all - Install as a Windows service * cloudfuse service install -- Unistall cloudfuse from a Windows service +- Uninstall cloudfuse from a Windows service * cloudfuse service uninstall - Start the Windows service * cloudfuse service start - Stop the Windows service * cloudfuse service stop - Mount an instance that will persist in Windows when restarted - * cloufuse service mount \ --config-file=\ + * cloudfuse service mount \ --config-file=\ - Unmount mount of Cloudfuse running as a Windows service - * cloufuse service unmount \ + * cloudfuse service unmount \ ## CLI parameters -- Note: Cloudfuse accepts all CLI parameters that Blobfuse does, but may ignore parameters that are no longer - applicable. - General options * `--config-file=`: The path to the config file. * `--log-level=`: The level of logs to capture. * `--log-file-path=`: The path for the log file. * `--foreground=true`: Mounts the system in foreground mode. * `--read-only=true`: Mount container in read-only mode. - * `--default-working-dir`: The default working directory to store log files and other cloudfuse related information. + * `--default-working-dir`: The default working directory to store log files + and other cloudfuse related information. * `--disable-version-check=true`: Disable the cloudfuse version check. - * `--secure-config=true` : Config file is encrypted suing 'cloudfuse secure` command. + * `--secure-config=true` : Config file is encrypted suing 'cloudfuse secure` + command. * `--passphrase=` : Passphrase used to encrypt/decrypt config file. - * `--wait-for-mount=` : Let parent process wait for given timeout before exit to ensure child - has started. + * `--wait-for-mount=` : Let parent process wait for + given timeout before exit to ensure child has started. - Attribute cache options - * `--attr-cache-timeout=`: The timeout for the attribute cache entries. + * `--attr-cache-timeout=`: The timeout for the attribute + cache entries. * `--no-symlinks=true`: To improve performance disable symlink support. - Storage options * `--container-name=`: The container to mount. - * `--cancel-list-on-mount-seconds=`: Time for which list calls will be blocked after mount. - (prevent billing charges on mounting) - * `--virtual-directory=true` : Support virtual directories without existence of a special marker blob for block blob - account. - * `--subdirectory=` : Subdirectory to mount instead of entire container. - * `--disable-compression:false` : Disable content encoding negotiation with server. If blobs have 'content-encoding' - set to 'gzip' then turn on this flag. - * `--use-adls=false` : Specify configured storage account is HNS enabled or not. This must be turned on when HNS - enabled account is mounted. + * `--cancel-list-on-mount-seconds=`: Time for which list + calls will be blocked after mount. (prevent billing charges on mounting) + * `--virtual-directory=true` : Support virtual directories without existence + of a special marker blob for block blob account (Azure only). + * `--subdirectory=` : Subdirectory to mount instead of entire + container. + * `--disable-compression:false` : Disable content encoding negotiation with + server. If objects/blobs have 'content-encoding' set to 'gzip' then turn + on this flag. + * `--use-adls=false` : Specify configured storage account is HNS enabled or + not. This must be turned on when HNS enabled account is mounted. - File cache options - * `--file-cache-timeout=`: Timeout for which file is cached on local system. + * `--file-cache-timeout=`: Timeout for which file is + cached on local system. * `--tmp-path=`: The path to the file cache. - * `--cache-size-mb=`: Amount of disk cache that can be used by cloudfuse. - * `--high-disk-threshold=`: If local cache usage exceeds this, start early eviction of files from cache. - * `--low-disk-threshold=`: If local cache usage comes below this threshold then stop early eviction. - * `--sync-to-flush=false` : Sync call will force upload a file to storage container if this is set to true, - otherwise it just evicts file from local cache. + * `--cache-size-mb=`: Amount of disk cache that can be used by + cloudfuse. + * `--high-disk-threshold=`: If local cache usage exceeds this, + start early eviction of files from cache. + * `--low-disk-threshold=`: If local cache usage comes below this + threshold then stop early eviction. + * `--sync-to-flush=false` : Sync call will force upload a file to storage + container if this is set to true, otherwise it just evicts file from local + cache. - Stream options - * `--block-size-mb=`: Size of a block to be downloaded during streaming. + * `--block-size-mb=`: Size of a block to be downloaded during + streaming. - Block-Cache options - * `--block-cache-block-size=`: Size of a block to be downloaded as a unit. - * `--block-cache-pool-size=`: Size of pool to be used for caching. This limits total memory used by block-cache. - * `--block-cache-path=`: Path where downloaded blocks will be persisted. Not providing this parameter will disable the disk caching. + * `--block-cache-block-size=`: Size of a block to be downloaded + as a unit. + * `--block-cache-pool-size=`: Size of pool to be used for + caching. This limits total memory used by block-cache. + * `--block-cache-path=`: Path where downloaded blocks will be + persisted. Not providing this parameter will disable the disk caching. * `--block-cache-disk-size=`: Disk space to be used for caching. - * `--block-cache-prefetch=`: Number of blocks to prefetch at max when sequential reads are in progress. - * `--block-cache-prefetch-on-open=true`: Start prefetching on open system call instead of waiting for first read. Enhances perf if file is read sequentially from offset 0. + * `--block-cache-prefetch=`: Number of blocks to prefetch + at max when sequential reads are in progress. + * `--block-cache-prefetch-on-open=true`: Start prefetching on open system + call instead of waiting for first read. Enhances perf if file is read + sequentially from offset 0. - Fuse options - * `--attr-timeout=`: Time the kernel can cache inode attributes. - * `--entry-timeout=`: Time the kernel can cache directory listing. - * `--negative-timeout=`: Time the kernel can cache non-existance of file or directory. + * `--attr-timeout=`: Time the kernel can cache inode + attributes. + * `--entry-timeout=`: Time the kernel can cache + directory listing. + * `--negative-timeout=`: Time the kernel can cache + non-existence of file or directory. * `--allow-other`: Allow other users to have access this mount point. - * `--disable-writeback-cache=true`: Disallow libfuse to buffer write requests if you must strictly open files in - O_WRONLY or O_APPEND mode. - * `--ignore-open-flags=true`: Ignore the append and write only flag since O_APPEND and O_WRONLY is not supported - with writeback caching. + * `--disable-writeback-cache=true`: Disallow libfuse to buffer write + requests if you must strictly open files in O_WRONLY or O_APPEND mode. + * `--ignore-open-flags=true`: Ignore the append and write only flag since + O_APPEND and O_WRONLY is not supported with writeback caching. +## S3 configuration +S3 connections will be configured by options in the following order of precedence: +- The s3storage section of the [Config file](#config-file) +- Environment variables + * `AWS_ACCESS_KEY_ID`: key ID, used as a pair with `AWS_SECRET_ACCESS_KEY` + * `AWS_SECRET_ACCESS_KEY`: secret key, used as a pair with + `AWS_ACCESS_KEY_ID` + * `AWS_SESSION_TOKEN`: validates a temporary key pair (key ID & secret key) + * `AWS_WEB_IDENTITY_TOKEN_FILE`: temporary credential from an external + identity provider + * `AWS_REGION`: the service region (e.g. us-east-1) + * `AWS_PROFILE`: the profile name to use from shared configuration + file(s) +- Shared configuration files (~/.aws/credentials and ~/.aws/config) + * The formatting for these files is documented at the link below. +For more information about environment variables and shared configuration files, +please see the documentation +[here](https://aws.github.io/aws-sdk-go-v2/docs/configuring-sdk/#specifying-credentials). -## Environment variables +## Azure storage configuration with environment variables - General options * `AZURE_STORAGE_ACCOUNT`: Specifies the storage account to be connected. * `AZURE_STORAGE_ACCOUNT_TYPE`: Specifies the account type 'block' or 'adls' - * `AZURE_STORAGE_ACCOUNT_CONTAINER`: Specifies the name of the container to be mounted - * `AZURE_STORAGE_BLOB_ENDPOINT`: Specifies the blob endpoint to use. Defaults to *.blob.core.windows.net, but is - useful for targeting storage emulators. - * `AZURE_STORAGE_AUTH_TYPE`: Overrides the currently specified auth type. Case insensitive. Options: Key, SAS, MSI, - SPN + * `AZURE_STORAGE_ACCOUNT_CONTAINER`: Specifies the name of the container to + be mounted + * `AZURE_STORAGE_BLOB_ENDPOINT`: Specifies the blob endpoint to use. + Defaults to *.blob.core.windows.net, but is useful for targeting storage + emulators. + * `AZURE_STORAGE_AUTH_TYPE`: Overrides the currently specified auth type. + Case insensitive. Options: Key, SAS, MSI, SPN - Account key auth: - * `AZURE_STORAGE_ACCESS_KEY`: Specifies the storage account key to use for authentication. + * `AZURE_STORAGE_ACCESS_KEY`: Specifies the storage account key to use for + authentication. - SAS token auth: - * `AZURE_STORAGE_SAS_TOKEN`: Specifies the SAS token to use for authentication. + * `AZURE_STORAGE_SAS_TOKEN`: Specifies the SAS token to use for + authentication. - Managed Identity auth: - * `AZURE_STORAGE_IDENTITY_CLIENT_ID`: Only one of these three parameters are needed if multiple identities are - present on the system. - * `AZURE_STORAGE_IDENTITY_OBJECT_ID`: Only one of these three parameters are needed if multiple identities are - present on the system. - * `AZURE_STORAGE_IDENTITY_RESOURCE_ID`: Only one of these three parameters are needed if multiple identities are - present on the system. - * `MSI_ENDPOINT`: Specifies a custom managed identity endpoint, as IMDS may not be available under some scenarios. - Uses the `MSI_SECRET` parameter as the `Secret` header. - * `MSI_SECRET`: Specifies a custom secret for an alternate managed identity endpoint. + * `AZURE_STORAGE_IDENTITY_CLIENT_ID`: Only one of these three parameters are + needed if multiple identities are present on the system. + * `AZURE_STORAGE_IDENTITY_OBJECT_ID`: Only one of these three parameters are + needed if multiple identities are present on the system. + * `AZURE_STORAGE_IDENTITY_RESOURCE_ID`: Only one of these three parameters + are needed if multiple identities are present on the system. + * `MSI_ENDPOINT`: Specifies a custom managed identity endpoint, as IMDS may + not be available under some scenarios. Uses the `MSI_SECRET` parameter as + the `Secret` header. + * `MSI_SECRET`: Specifies a custom secret for an alternate managed identity + endpoint. - Service Principal Name auth: - * `AZURE_STORAGE_SPN_CLIENT_ID`: Specifies the client ID for your application registration - * `AZURE_STORAGE_SPN_TENANT_ID`: Specifies the tenant ID for your application registration - * `AZURE_STORAGE_AAD_ENDPOINT`: Specifies a custom AAD endpoint to authenticate against - * `AZURE_STORAGE_SPN_CLIENT_SECRET`: Specifies the client secret for your application registration. - * `AZURE_STORAGE_AUTH_RESOURCE` : Scope to be used while requesting for token. + * `AZURE_STORAGE_SPN_CLIENT_ID`: Specifies the client ID for your + application registration + * `AZURE_STORAGE_SPN_TENANT_ID`: Specifies the tenant ID for your + application registration + * `AZURE_STORAGE_AAD_ENDPOINT`: Specifies a custom AAD endpoint to + authenticate against + * `AZURE_STORAGE_SPN_CLIENT_SECRET`: Specifies the client secret for your + application registration. + * `AZURE_STORAGE_AUTH_RESOURCE` : Scope to be used while requesting for + token. - Proxy Server: * `http_proxy`: The proxy server address. Example: `10.1.22.4:8080`. - * `https_proxy`: The proxy server address when https is turned off forcing http. Example: `10.1.22.4:8080`. + * `https_proxy`: The proxy server address when https is turned off forcing + http. Example: `10.1.22.4:8080`. ## Config file - See [this](./sampleFileCacheConfig.yaml) sample config file. -- See [this](./setup/baseConfig.yaml) config file for a list and description of all possible configurable options in - cloudfuse. +- See [this](./setup/baseConfig.yaml) config file for a list and description of + all possible configurable options in cloudfuse. ***Please note: do not use quotations `""` for any of the config parameters*** ## Frequently Asked Questions -- How do I generate a SAS with permissions for rename? az cli has a command to generate a sas token. Open a command -prompt and make sure you are logged in to az cli. Run the following command and the sas token will be displayed in the -command prompt. az storage container generate-sas --account-name \ --account-key -\ -n \ --permissions dlrwac --start \ --expiry \ -- Why do I get EINVAL on opening a file with WRONLY or APPEND flags? To improve performance, Cloudfuse by default -enables writeback caching, which can produce unexpected behavior for files opened with WRONLY or APPEND flags, so -Cloudfuse returns EINVAL on open of a file with those flags. Either use disable-writeback-caching to turn off writeback -caching (can potentially result in degraded performance) or ignore-open-flags (replace WRONLY with RDWR and ignore -APPEND) based on your workload. -- How to mount Cloudfuse inside a container? Refer to 'docker' folder in this repo. It contains a sample 'Dockerfile'. -If you wish to create your own container image, try 'buildandruncontainer.sh' script, it will create a container image -and launch the container using current environment variables holding your storage account credentials. -- Why am I not able to see the updated contents of file(s), which were updated through means other than Cloudfuse mount? -If your use-case involves updating/uploading file(s) through other means and you wish to see the updated contents on -Cloudfuse mount then you need to disable kernel page-cache. `-o direct_io` CLI parameter is the option you need to use -while mounting. Along with this, set `file-cache-timeout=0` and all other libfuse caching parameters should also be set -to 0. User shall be aware that disabling kernel cache can result into more calls to S3 or Azure Storage which will have -cost and performance implications. +- How do I generate a SAS for Azure with permissions for rename? az cli has a + command to generate a sas token. Open a command prompt and make sure you are + logged in to az cli. Run the following command and the sas token will be + displayed in the command prompt. az storage container generate-sas + --account-name \ --account-key \ -n + \ --permissions dlrwac --start \ + --expiry \ +- Why do I get EINVAL on opening a file with WRONLY or APPEND flags? To improve + performance, Cloudfuse by default enables writeback caching, which can produce + unexpected behavior for files opened with WRONLY or APPEND flags, so Cloudfuse + returns EINVAL on open of a file with those flags. Either use + disable-writeback-caching to turn off writeback caching (can potentially + result in degraded performance) or ignore-open-flags (replace WRONLY with RDWR + and ignore APPEND) based on your workload. +- How to mount Cloudfuse inside a container? Refer to 'docker' folder in this + repo. It contains a sample 'Dockerfile'. If you wish to create your own + container image, try 'buildandruncontainer.sh' script, it will create a + container image and launch the container using current environment variables + holding your storage account credentials. +- Why am I not able to see the updated contents of file(s), which were updated + through means other than Cloudfuse mount? If your use-case involves + updating/uploading file(s) through other means and you wish to see the updated + contents on Cloudfuse mount then you need to disable kernel page-cache. `-o + direct_io` CLI parameter is the option you need to use while mounting. Along + with this, set `file-cache-timeout=0` and all other libfuse caching parameters + should also be set to 0. User shall be aware that disabling kernel cache can + result into more calls to S3 or Azure Storage which will have cost and + performance implications. ## Un-Supported File system operations -- mkfifo : fifo creation is not supported by cloudfuse and this will result in "function not implemented" error -- chown : Change of ownership is not supported by Azure Storage hence Cloudfuse does not support this. +- mkfifo : fifo creation is not supported by cloudfuse and this will result in + "function not implemented" error +- chown : Change of ownership is not supported by Azure Storage hence Cloudfuse + does not support this. - Creation of device files or pipes is not supported by Cloudfuse. - Cloudfuse does not support extended-attributes (x-attrs) operations - Cloudfuse does not support lseek() operation on directory handles. No error is thrown but it will not work as expected. ## Un-Supported Scenarios -- Cloudfuse does not support overlapping mount paths. While running multiple instances of Cloudfuse make sure each - instance has a unique and non-overlapping mount point. -- Cloudfuse does not support co-existance with NFS on same mount path. Behaviour in this case is undefined. -- For Azure block blob accounts, where data is uploaded through other means, Cloudfuse expects special directory marker - files to exist in container. In absence of this few file operations might not work. For e.g. if you have a blob - 'A/B/c.txt' then special marker files shall exists for 'A' and 'A/B', otherwise opening of 'A/B/c.txt' will fail. Once - a 'ls' operation is done on these directories 'A' and 'A/B' you will be able to open 'A/B/c.txt' as well. Possible - workaround to resolve this from your container is to either - - create the directory marker files manually through portal or run 'mkdir' command for 'A' and 'A/B' from cloudfuse. - Refer [me](https://github.com/Azure/azure-storage-fuse/issues/866) for details on this. +- Cloudfuse does not support overlapping mount paths. While running multiple + instances of Cloudfuse make sure each instance has a unique and + non-overlapping mount point. +- Cloudfuse does not support co-existence with NFS on same mount path. Behavior + in this case is undefined. +- For Azure block blob accounts, where data is uploaded through other means, + Cloudfuse expects special directory marker files to exist in container. In + absence of this few file operations might not work. For e.g. if you have a + blob 'A/B/c.txt' then special marker files shall exists for 'A' and 'A/B', + otherwise opening of 'A/B/c.txt' will fail. Once a 'ls' operation is done on + these directories 'A' and 'A/B' you will be able to open 'A/B/c.txt' as well. + Possible workaround to resolve this from your container is to either + + create the directory marker files manually through portal or run 'mkdir' + command for 'A' and 'A/B' from cloudfuse. Refer + [me](https://github.com/Azure/azure-storage-fuse/issues/866) for details on + this. ## Limitations -- In case of Azure BlockBlob accounts, ACLs are not supported by Azure Storage so Cloudfuse will by default return - success for 'chmod' operation. However it will work fine for Gen2 (DataLake) accounts. -- When Cloudfuse is mounted on a container, SYS_ADMIN privileges are required for it to interact with the fuse driver. - If container is created without the privilege, mount will fail. Sample command to spawn a docker container is - - `docker run -it --rm --cap-add=SYS_ADMIN --device=/dev/fuse --security-opt apparmor:unconfined ` - -### Syslog security warning -By default, Cloudfuse will log to syslog. The default settings will, in some cases, log relevant file paths to syslog. -If this is sensitive information, turn off logging or set log-level to LOG_ERR. +- In case of Azure BlockBlob accounts, ACLs are not supported by Azure Storage + so Cloudfuse will by default return success for 'chmod' operation. However it + will work fine for Gen2 (DataLake) accounts. ACLs are not currently supported + for S3 accounts. +- When Cloudfuse is mounted on a container, SYS_ADMIN privileges are required + for it to interact with the fuse driver. If container is created without the + privilege, mount will fail. Sample command to spawn a docker container is + `docker run -it --rm --cap-add=SYS_ADMIN --device=/dev/fuse --security-opt + apparmor:unconfined ` + +### Syslog security warning +By default, Cloudfuse will log to syslog. The default settings will, in some +cases, log relevant file paths to syslog. If this is sensitive information, turn +off logging or set log-level to LOG_ERR. ## License This project is licensed under MIT. - + ## Contributing This project welcomes contributions and suggestions. -This project is governed by the [code of conduct](CODE_OF_CONDUCT.md). You are expected to follow this as you contribute -to the project. Please report all unacceptable behavior to [opensource@seagate.com](mailto:opensource@seagate.com). \ No newline at end of file +This project is governed by the [code of conduct](CODE_OF_CONDUCT.md). You are +expected to follow this as you contribute to the project. Please report all +unacceptable behavior to +[opensource@seagate.com](mailto:opensource@seagate.com). diff --git a/component/s3storage/client.go b/component/s3storage/client.go index 47cfdf601..b6626476e 100644 --- a/component/s3storage/client.go +++ b/component/s3storage/client.go @@ -80,16 +80,15 @@ func (cl *Client) Configure(cfg Config) error { // Set the endpoint supplied in the config file endpointResolver := aws.EndpointResolverWithOptionsFunc(func(service, region string, options ...interface{}) (aws.Endpoint, error) { if service == s3.ServiceID { - // figure out the region + // resolve region if cl.Config.authConfig.Region == "" && region == "" { region = "us-east-1" + // write region back to config struct + cl.Config.authConfig.Region = region } - // figure out the endpoint URL - var url string - if cl.Config.authConfig.Endpoint != "" { - url = cl.Config.authConfig.Endpoint - } else { - // TODO: default to another S3 provider + // resolve endpoint URL + if cl.Config.authConfig.Endpoint == "" { + var url string switch region { case "us-east-1": url = "https://s3.us-east-1.lyvecloud.seagate.com" @@ -104,43 +103,51 @@ func (cl *Client) Configure(cfg Config) error { case "us-central-2": url = "https://s3.us-central-2.lyvecloud.seagate.com" default: - return aws.Endpoint{}, fmt.Errorf("unrecognized region \"%s\"", region) + errMsg := fmt.Sprintf("unrecognized region \"%s\"", region) + log.Err("Client::Configure : %s", errMsg) + return aws.Endpoint{}, fmt.Errorf("%s", errMsg) } - // save the results back to the config + // on success, write back to config struct cl.Config.authConfig.Region = region cl.Config.authConfig.Endpoint = url } // create the endpoint return aws.Endpoint{ PartitionID: "aws", - URL: url, + URL: cl.Config.authConfig.Endpoint, SigningRegion: cl.Config.authConfig.Region, }, nil } return aws.Endpoint{}, fmt.Errorf("unknown endpoint requested") }) - // TODO: check if the config is missing credentials - // and allow the default config to find them in the environment on its own - staticProvider := credentials.NewStaticCredentialsProvider( - cl.Config.authConfig.KeyID, - cl.Config.authConfig.SecretKey, - "", - ) + var credentialsProvider aws.CredentialsProvider + credentialsInConfig := cl.Config.authConfig.KeyID != "" && cl.Config.authConfig.SecretKey != "" + if credentialsInConfig { + credentialsProvider = credentials.NewStaticCredentialsProvider( + cl.Config.authConfig.KeyID, + cl.Config.authConfig.SecretKey, + "", + ) + } defaultConfig, err := config.LoadDefaultConfig( context.TODO(), - config.WithCredentialsProvider(staticProvider), + config.WithSharedConfigProfile(cl.Config.authConfig.Profile), + config.WithCredentialsProvider(credentialsProvider), config.WithEndpointResolverWithOptions(endpointResolver), ) if err != nil { log.Err("Client::Configure : config.LoadDefaultConfig() failed. Here's why: %v", err) return err } - // Create an Amazon S3 service client cl.awsS3Client = s3.NewFromConfig(defaultConfig) - - return nil + // ListBuckets here to test connection + _, err = cl.ListBuckets() + if err != nil { + log.Err("Client::Configure : listing buckets failed. Here's why: %v", err) + } + return err } // For dynamic configuration, update the config here. diff --git a/component/s3storage/client_test.go b/component/s3storage/client_test.go index 69634c2c9..3daba8ed6 100644 --- a/component/s3storage/client_test.go +++ b/component/s3storage/client_test.go @@ -89,6 +89,7 @@ func newTestClient(configuration string) (*Client, error) { KeyID: conf.KeyID, SecretKey: conf.SecretKey, Region: conf.Region, + Profile: conf.Profile, Endpoint: conf.Endpoint, }, prefixPath: conf.PrefixPath, @@ -97,7 +98,7 @@ func newTestClient(configuration string) (*Client, error) { uploadCutoff: conf.UploadCutoffMb * common.MbToBytes, } // create a Client - client := NewConnection(configForS3Client) + client, err := NewConnection(configForS3Client) return client.(*Client), err } @@ -134,7 +135,8 @@ func (s *clientTestSuite) SetupTest() { s.setupTestHelper("", true) } -func (s *clientTestSuite) setupTestHelper(configuration string, create bool) { +func (s *clientTestSuite) setupTestHelper(configuration string, create bool) error { + // TODO: actually create a test bucket for testing (flagged with the create parameter) if storageTestConfigurationParameters.PartSizeMb == 0 { storageTestConfigurationParameters.PartSizeMb = 5 } @@ -151,8 +153,10 @@ func (s *clientTestSuite) setupTestHelper(configuration string, create bool) { s.assert = assert.New(s.T()) - s.client, _ = newTestClient(configuration) + var err error + s.client, err = newTestClient(configuration) s.awsS3Client = s.client.awsS3Client + return err } // TODO: do we need s3StatsCollector for this test suite? @@ -164,6 +168,42 @@ func (s *clientTestSuite) cleanupTest() { // s.tearDownTestHelper(true) _ = log.Destroy() } + +func (s *clientTestSuite) TestCredentialsError() { + defer s.cleanupTest() + // setup + config := fmt.Sprintf("s3storage:\n bucket-name: %s\n key-id: %s\n secret-key: %s", + storageTestConfigurationParameters.BucketName, storageTestConfigurationParameters.KeyID, + "WRONGSECRETKEY") + // S3 connection creation should fail + err := s.setupTestHelper(config, false) + s.assert.NotNil(err) +} + +func (s *clientTestSuite) TestEnvVarCredentials() { + defer s.cleanupTest() + // setup + os.Setenv("AWS_ACCESS_KEY_ID", storageTestConfigurationParameters.KeyID) + os.Setenv("AWS_SECRET_ACCESS_KEY", storageTestConfigurationParameters.SecretKey) + config := fmt.Sprintf("s3storage:\n bucket-name: %s", "stxe1-srg-lens-lab1") + // S3 connection should find credentials from environment variables + err := s.setupTestHelper(config, false) + s.assert.Nil(err) +} + +func (s *clientTestSuite) TestCredentialPrecedence() { + defer s.cleanupTest() + // setup + os.Setenv("AWS_ACCESS_KEY_ID", storageTestConfigurationParameters.KeyID) + os.Setenv("AWS_SECRET_ACCESS_KEY", storageTestConfigurationParameters.SecretKey) + config := fmt.Sprintf("s3storage:\n bucket-name: %s\n key-id: %s\n secret-key: %s", + storageTestConfigurationParameters.BucketName, storageTestConfigurationParameters.KeyID, + "WRONGSECRETKEY") + // Wrong credentials should take precedence, so S3 connection should fail + err := s.setupTestHelper(config, false) + s.assert.NotNil(err) +} + func (s *clientTestSuite) TestListBuckets() { defer s.cleanupTest() // TODO: generalize this test by creating, listing, then destroying a bucket @@ -657,7 +697,7 @@ func (s *clientTestSuite) TestReadToFileRanged() { func (s *clientTestSuite) TestReadToFileNoMultipart() { storageTestConfigurationParameters.DisableConcurrentDownload = true vdConfig := generateConfigYaml(storageTestConfigurationParameters) - s.setupTestHelper(vdConfig, true) + s.setupTestHelper(vdConfig, false) defer s.cleanupTest() // setup name := generateFileName() diff --git a/component/s3storage/config.go b/component/s3storage/config.go index 758c2e111..48a16fb08 100644 --- a/component/s3storage/config.go +++ b/component/s3storage/config.go @@ -51,6 +51,7 @@ type Options struct { KeyID string `config:"key-id" yaml:"key-id,omitempty"` SecretKey string `config:"secret-key" yaml:"secret-key,omitempty"` Region string `config:"region" yaml:"region,omitempty"` + Profile string `config:"profile" yaml:"region,omitempty"` Endpoint string `config:"endpoint" yaml:"endpoint,omitempty"` PrefixPath string `config:"subdirectory" yaml:"subdirectory,omitempty"` RestrictedCharsWin bool `config:"restricted-characters-windows" yaml:"-"` @@ -69,21 +70,12 @@ func ParseAndValidateConfig(s3 *S3Storage, opt Options) error { return fmt.Errorf("%w: bucket name not provided", errConfigFieldEmpty) } - // Validate key id - if opt.KeyID == "" { - return fmt.Errorf("%w: key id not provided", errConfigFieldEmpty) - } - - // Validate secret key - if opt.SecretKey == "" { - return fmt.Errorf("%w: bucket name not provided", errConfigFieldEmpty) - } - // Set authentication config s3.stConfig.authConfig.BucketName = opt.BucketName s3.stConfig.authConfig.KeyID = opt.KeyID s3.stConfig.authConfig.SecretKey = opt.SecretKey s3.stConfig.authConfig.Region = opt.Region + s3.stConfig.authConfig.Profile = opt.Profile s3.stConfig.authConfig.Endpoint = opt.Endpoint // Set restricted characters diff --git a/component/s3storage/config_test.go b/component/s3storage/config_test.go index ecdcdba07..1dad3212a 100644 --- a/component/s3storage/config_test.go +++ b/component/s3storage/config_test.go @@ -66,6 +66,7 @@ func (s *configTestSuite) SetupTest() { KeyID: "testKeyId", SecretKey: "testSecretKey", Region: "testRegion", + Profile: "testProfile", Endpoint: "testEndpoint", RestrictedCharsWin: true, PrefixPath: "testPrefixPath", @@ -84,23 +85,7 @@ func (s *configTestSuite) TestEmptyBucketName() { s.assert.ErrorIs(err, errConfigFieldEmpty) } -func (s *configTestSuite) TestEmptyKeyID() { - // When - s.opt.KeyID = "" - - // Then - err := ParseAndValidateConfig(s.s3, s.opt) - s.assert.ErrorIs(err, errConfigFieldEmpty) -} - -func (s *configTestSuite) TestEmptySecretKey() { - // When - s.opt.SecretKey = "" - - // Then - err := ParseAndValidateConfig(s.s3, s.opt) - s.assert.ErrorIs(err, errConfigFieldEmpty) -} +// TODO: make errors from the default aws credentials provider visible to the user somehow func (s *configTestSuite) TestConfigParse() { // When @@ -112,6 +97,7 @@ func (s *configTestSuite) TestConfigParse() { s.assert.Equal(s.opt.KeyID, s.s3.stConfig.authConfig.KeyID) s.assert.Equal(s.opt.SecretKey, s.s3.stConfig.authConfig.SecretKey) s.assert.Equal(s.opt.Region, s.s3.stConfig.authConfig.Region) + s.assert.Equal(s.opt.Profile, s.s3.stConfig.authConfig.Profile) s.assert.Equal(s.opt.Endpoint, s.s3.stConfig.authConfig.Endpoint) s.assert.Equal(s.opt.RestrictedCharsWin, s.s3.stConfig.restrictedCharsWin) s.assert.Equal(s.opt.PrefixPath, s.s3.stConfig.prefixPath) diff --git a/component/s3storage/connection.go b/component/s3storage/connection.go index 5b80bc9fe..e1d1f836d 100644 --- a/component/s3storage/connection.go +++ b/component/s3storage/connection.go @@ -66,15 +66,16 @@ type s3AuthConfig struct { BucketName string KeyID string SecretKey string - Endpoint string Region string + Profile string + Endpoint string } // NewConnection : Create S3Connection Object -func NewConnection(cfg Config) S3Connection { +func NewConnection(cfg Config) (S3Connection, error) { stg := &Client{} - _ = stg.Configure(cfg) - return stg + err := stg.Configure(cfg) + return stg, err } type S3Connection interface { diff --git a/component/s3storage/s3storage.go b/component/s3storage/s3storage.go index 5ea9e031b..a997130d8 100644 --- a/component/s3storage/s3storage.go +++ b/component/s3storage/s3storage.go @@ -136,8 +136,9 @@ func (s3 *S3Storage) OnConfigChange() { } func (s3 *S3Storage) configureAndTest(isParent bool) error { - s3.storage = NewConnection(s3.stConfig) - return nil + var err error + s3.storage, err = NewConnection(s3.stConfig) + return err } // Start : Initialize the go-sdk pipeline here and test auth is working fine diff --git a/component/s3storage/s3storage_test.go b/component/s3storage/s3storage_test.go index c69955164..d8cbc3ef4 100644 --- a/component/s3storage/s3storage_test.go +++ b/component/s3storage/s3storage_test.go @@ -95,8 +95,9 @@ type storageTestConfiguration struct { BucketName string `json:"bucket-name"` KeyID string `json:"access-key"` SecretKey string `json:"secret-key"` - Endpoint string `json:"endpoint"` Region string `json:"region"` + Profile string `json:"profile"` + Endpoint string `json:"endpoint"` Prefix string `json:"prefix"` RestrictedCharsWin bool `json:"restricted-characters-windows"` PartSizeMb int64 `json:"part-size-mb"` @@ -332,10 +333,10 @@ func (s *s3StorageTestSuite) setupTestHelper(configuration string, bucket string func generateConfigYaml(testParams storageTestConfiguration) string { return fmt.Sprintf("s3storage:\n bucket-name: %s\n key-id: %s\n secret-key: %s\n"+ - " endpoint: %s\n region: %s\n subdirectory: %s\n restricted-characters-windows: %t\n part-size-mb: %d\n"+ - " upload-cutoff-mb: %d\n disable-concurrent-download: %t", + " region: %s\n profile: %s\n endpoint: %s\n subdirectory: %s\n restricted-characters-windows: %t\n"+ + " part-size-mb: %d\n upload-cutoff-mb: %d\n disable-concurrent-download: %t", testParams.BucketName, testParams.KeyID, testParams.SecretKey, - testParams.Endpoint, testParams.Region, testParams.Prefix, testParams.RestrictedCharsWin, testParams.PartSizeMb, + testParams.Region, testParams.Profile, testParams.Endpoint, testParams.Prefix, testParams.RestrictedCharsWin, testParams.PartSizeMb, testParams.UploadCutoffMb, testParams.DisableConcurrentDownload) } @@ -362,9 +363,8 @@ func (s *s3StorageTestSuite) cleanupTest() { func (s *s3StorageTestSuite) TestDefault() { defer s.cleanupTest() + // only test required parameters s.assert.Equal(storageTestConfigurationParameters.BucketName, s.s3Storage.stConfig.authConfig.BucketName) - s.assert.Equal(storageTestConfigurationParameters.KeyID, s.s3Storage.stConfig.authConfig.KeyID) - s.assert.Equal(storageTestConfigurationParameters.SecretKey, s.s3Storage.stConfig.authConfig.SecretKey) // TODO: Uncomment the following line when we have our own bucket and can remove the default test prefix path // s.assert.Empty(s.s3Storage.stConfig.prefixPath) s.assert.False(s.s3Storage.stConfig.restrictedCharsWin) diff --git a/component/s3storage/s3wrappers_test.go b/component/s3storage/s3wrappers_test.go index 888388418..6bd3e26ea 100644 --- a/component/s3storage/s3wrappers_test.go +++ b/component/s3storage/s3wrappers_test.go @@ -90,9 +90,11 @@ func (s *s3wrapperTestSuite) SetupTest() { func (s *s3wrapperTestSuite) setupTestHelper(configuration string) { if configuration == "" { - configuration = fmt.Sprintf("s3storage:\n bucket-name: %s\n key-id: %s\n secret-key: %s\n endpoint: %s\n region: %s", + configuration = fmt.Sprintf( + "s3storage:\n bucket-name: %s\n key-id: %s\n secret-key: %s\n region: %s\n profile: %s\n endpoint: %s", storageTestConfigurationParameters.BucketName, storageTestConfigurationParameters.KeyID, - storageTestConfigurationParameters.SecretKey, storageTestConfigurationParameters.Endpoint, storageTestConfigurationParameters.Region) + storageTestConfigurationParameters.SecretKey, storageTestConfigurationParameters.Region, + storageTestConfigurationParameters.Profile, storageTestConfigurationParameters.Endpoint) } s.config = configuration diff --git a/setup/baseConfig.yaml b/setup/baseConfig.yaml index 2a42c8267..e1d19b08a 100644 --- a/setup/baseConfig.yaml +++ b/setup/baseConfig.yaml @@ -16,7 +16,7 @@ # 6. By default 'writeback-cache' is enabled for libfuse3 and this may result in append/write operations to fail. # Either you can disable 'writeback-cache', which might hurt the performance # or you can configure cloudfuse to ignore open flags given by user and make it work with ''writeback-cache'. -# 'libfuse' sectoin below has both the configurations. +# 'libfuse' section below has both the configurations. # 7. If are you using 'allow-other: true' config then make sure user_allow_other is enabled in /etc/fuse.conf file as # well otherwise mount will fail. By default /etc/fuse.conf will have this option disabled we just need to # enable it and save the file. @@ -173,10 +173,11 @@ azstorage: s3storage: # Required bucket-name: - key-id: - secret-key: # Optional + key-id: + secret-key: region: + profile: endpoint: subdirectory: part-size-mb: