Skip to content

Commit

Permalink
Preserve ACL/Permissions while uploading file over datalake (#1571)
Browse files Browse the repository at this point in the history
* Add code to reset the ACLs post file upload in adls
  • Loading branch information
vibhansa-msft authored Nov 25, 2024
1 parent 0ab1a64 commit b07781c
Show file tree
Hide file tree
Showing 7 changed files with 171 additions and 4 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
- Rename file was calling an additional getProperties call.
- Delete empty directories from local cache on rmdir operation.
- [#1547](https://github.com/Azure/azure-storage-fuse/issues/1547) Truncate logic of file cache is modified to prevent downloading and uploading the entire file.
- Updating a file via Blobfuse2 was resetting the ACLs and Permissions applied to file in Datalake.

**Features**
- Added 'gen-config' command to auto generate blobfuse2 config file.
Expand Down
3 changes: 3 additions & 0 deletions component/azstorage/azstorage.go
Original file line number Diff line number Diff line change
Expand Up @@ -662,6 +662,9 @@ func init() {
cpkEnabled := config.AddBoolFlag("cpk-enabled", false, "Enable client provided key.")
config.BindPFlag(compName+".cpk-enabled", cpkEnabled)

preserveACL := config.AddBoolFlag("preserve-acl", false, "Preserve ACL and Permissions set on file during updates")
config.BindPFlag(compName+".preserve-acl", preserveACL)

config.RegisterFlagCompletionFunc("container-name", func(cmd *cobra.Command, args []string, toComplete string) ([]string, cobra.ShellCompDirective) {
return nil, cobra.ShellCompDirectiveNoFileComp
})
Expand Down
7 changes: 5 additions & 2 deletions component/azstorage/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,7 @@ type AzStorageOptions struct {
CPKEnabled bool `config:"cpk-enabled" yaml:"cpk-enabled"`
CPKEncryptionKey string `config:"cpk-encryption-key" yaml:"cpk-encryption-key"`
CPKEncryptionKeySha256 string `config:"cpk-encryption-key-sha256" yaml:"cpk-encryption-key-sha256"`
PreserveACL bool `config:"preserve-acl" yaml:"preserve-acl"`

// v1 support
UseAdls bool `config:"use-adls" yaml:"-"`
Expand Down Expand Up @@ -498,12 +499,14 @@ func ParseAndValidateConfig(az *AzStorage, opt AzStorageOptions) error {
log.Warn("unsupported v1 CLI parameter: debug-libcurl is not applicable in blobfuse2.")
}

az.stConfig.preserveACL = opt.PreserveACL

log.Crit("ParseAndValidateConfig : account %s, container %s, account-type %s, auth %s, prefix %s, endpoint %s, MD5 %v %v, virtual-directory %v, disable-compression %v, CPK %v",
az.stConfig.authConfig.AccountName, az.stConfig.container, az.stConfig.authConfig.AccountType, az.stConfig.authConfig.AuthMode,
az.stConfig.prefixPath, az.stConfig.authConfig.Endpoint, az.stConfig.validateMD5, az.stConfig.updateMD5, az.stConfig.virtualDirectory, az.stConfig.disableCompression, az.stConfig.cpkEnabled)
log.Crit("ParseAndValidateConfig : use-HTTP %t, block-size %d, max-concurrency %d, default-tier %s, fail-unsupported-op %t, mount-all-containers %t", az.stConfig.authConfig.UseHTTP, az.stConfig.blockSize, az.stConfig.maxConcurrency, az.stConfig.defaultTier, az.stConfig.ignoreAccessModifiers, az.stConfig.mountAllContainers)
log.Crit("ParseAndValidateConfig : Retry Config: retry-count %d, max-timeout %d, backoff-time %d, max-delay %d",
az.stConfig.maxRetries, az.stConfig.maxTimeout, az.stConfig.backoffTime, az.stConfig.maxRetryDelay)
log.Crit("ParseAndValidateConfig : Retry Config: retry-count %d, max-timeout %d, backoff-time %d, max-delay %d, preserve-acl: %v",
az.stConfig.maxRetries, az.stConfig.maxTimeout, az.stConfig.backoffTime, az.stConfig.maxRetryDelay, az.stConfig.preserveACL)

log.Crit("ParseAndValidateConfig : Telemetry : %s, honour-ACL %v, disable-symlink %v", az.stConfig.telemetry, az.stConfig.honourACL, az.stConfig.disableSymlink)

Expand Down
1 change: 1 addition & 0 deletions component/azstorage/connection.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ type AzStorageConfig struct {
telemetry string
honourACL bool
disableSymlink bool
preserveACL bool

// CPK related config
cpkEnabled bool
Expand Down
37 changes: 35 additions & 2 deletions component/azstorage/datalake.go
Original file line number Diff line number Diff line change
Expand Up @@ -553,7 +553,40 @@ func (dl *Datalake) ReadInBuffer(name string, offset int64, len int64, data []by

// WriteFromFile : Upload local file to file
func (dl *Datalake) WriteFromFile(name string, metadata map[string]*string, fi *os.File) (err error) {
return dl.BlockBlob.WriteFromFile(name, metadata, fi)
// File in DataLake may have permissions and ACL set. Just uploading the file will override them.
// So, we need to get the existing permissions and ACL and set them back after uploading the file.

var acl string = ""
var fileClient *file.Client = nil

if dl.Config.preserveACL {
fileClient = dl.Filesystem.NewFileClient(filepath.Join(dl.Config.prefixPath, name))
resp, err := fileClient.GetAccessControl(context.Background(), nil)
if err != nil {
log.Err("Datalake::getACL : Failed to get ACLs for file %s [%s]", name, err.Error())
} else if resp.ACL != nil {
acl = *resp.ACL
}
}

// Upload the file, which will override the permissions and ACL
retCode := dl.BlockBlob.WriteFromFile(name, metadata, fi)

if acl != "" {
// Cannot set both permissions and ACL in one call. ACL includes permission as well so just setting those back
// Just setting up the permissions will delete existing ACLs applied on the blob so do not convert this code to
// just set the permissions.
_, err := fileClient.SetAccessControl(context.Background(), &file.SetAccessControlOptions{
ACL: &acl,
})

if err != nil {
// Earlier code was ignoring this so it might break customer cases where they do not have auth to update ACL
log.Err("Datalake::WriteFromFile : Failed to set ACL for %s [%s]", name, err.Error())
}
}

return retCode
}

// WriteFromBuffer : Upload from a buffer to a file
Expand Down Expand Up @@ -588,7 +621,7 @@ func (dl *Datalake) ChangeMod(name string, mode os.FileMode) error {
// and create new string with the username included in the string
// Keeping this code here so in future if its required we can get the string and manipulate
currPerm, err := fileURL.GetAccessControl(context.Background())
currPerm, err := fileURL.getACL(context.Background())
e := storeDatalakeErrToErr(err)
if e == ErrFileNotFound {
return syscall.ENOENT
Expand Down
125 changes: 125 additions & 0 deletions component/azstorage/datalake_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,12 +39,15 @@ package azstorage
import (
"bytes"
"container/list"
"context"
"crypto/rand"
"encoding/base64"
"encoding/json"
"fmt"
"io"
"io/fs"
"os"
"path/filepath"
"strings"
"syscall"
"testing"
Expand Down Expand Up @@ -2568,6 +2571,128 @@ func (s *datalakeTestSuite) TestUploadWithCPKEnabled() {
_ = os.Remove(name1)
}

func getACL(dl *Datalake, name string) (string, error) {
fileClient := dl.Filesystem.NewFileClient(filepath.Join(dl.Config.prefixPath, name))
acl, err := fileClient.GetAccessControl(context.Background(), nil)

if err != nil || acl.ACL == nil {
return "", err
}

return *acl.ACL, nil
}

func (s *datalakeTestSuite) createFileWithData(name string, data []byte, mode os.FileMode) {
h, _ := s.az.CreateFile(internal.CreateFileOptions{Name: name})
_, err := s.az.WriteFile(internal.WriteFileOptions{Handle: h, Offset: 0, Data: data})
s.assert.Nil(err)

err = s.az.Chmod(internal.ChmodOptions{Name: name, Mode: mode})
s.assert.Nil(err)

s.az.CloseFile(internal.CloseFileOptions{Handle: h})
s.assert.Nil(err)
}

func (s *datalakeTestSuite) TestPermissionPreservationWithoutFlag() {
defer s.cleanupTest()
name := generateFileName()

data := []byte("test data")
mode := fs.FileMode(0764)
s.createFileWithData(name, data, mode)
// Simulate file copy and permission checks
_ = os.WriteFile(name+"_local", []byte("123123"), mode)
f, err := os.OpenFile(name+"_local", os.O_RDWR, mode)
s.assert.Nil(err)

err = s.az.CopyFromFile(internal.CopyFromFileOptions{Name: name, File: f, Metadata: nil})
s.assert.Nil(err)
attr, err := s.az.GetAttr(internal.GetAttrOptions{Name: name})
s.assert.Nil(err)
s.assert.NotNil(attr)
s.assert.NotEqual(os.FileMode(0764), attr.Mode)

acl, err := getACL(s.az.storage.(*Datalake), name)
s.assert.Nil(err)
s.assert.Contains(acl, "user::rw-")
s.assert.Contains(acl, "group::r--")
s.assert.Contains(acl, "other::---")

os.Remove(name + "_local")
}

func (s *datalakeTestSuite) TestPermissionPreservationWithFlag() {
defer s.cleanupTest()
// Setup
conf := fmt.Sprintf("azstorage:\n preserve-acl: true\n account-name: %s\n endpoint: https://%s.dfs.core.windows.net/\n type: adls\n account-key: %s\n mode: key\n container: %s\n fail-unsupported-op: true",
storageTestConfigurationParameters.AdlsAccount, storageTestConfigurationParameters.AdlsAccount, storageTestConfigurationParameters.AdlsKey, s.container)
s.setupTestHelper(conf, s.container, false)

name := generateFileName()
data := []byte("test data")
mode := fs.FileMode(0764)
s.createFileWithData(name, data, mode)
// Simulate file copy and permission checks
_ = os.WriteFile(name+"_local", []byte("123123"), mode)
f, err := os.OpenFile(name+"_local", os.O_RDWR, mode)
s.assert.Nil(err)

err = s.az.CopyFromFile(internal.CopyFromFileOptions{Name: name, File: f, Metadata: nil})
s.assert.Nil(err)

attr, err := s.az.GetAttr(internal.GetAttrOptions{Name: name})
s.assert.Nil(err)
s.assert.NotNil(attr)
s.assert.Equal(os.FileMode(0764), attr.Mode)

acl, err := getACL(s.az.storage.(*Datalake), name)
s.assert.Nil(err)
s.assert.Contains(acl, "user::rwx")
s.assert.Contains(acl, "group::rw-")
s.assert.Contains(acl, "other::r--")

os.Remove(name + "_local")
}

func (s *datalakeTestSuite) TestPermissionPreservationWithCommit() {
defer s.cleanupTest()
// Setup
s.setupTestHelper("", s.container, false)
name := generateFileName()
s.createFileWithData(name, []byte("test data"), fs.FileMode(0767))
data := []byte("123123")

id := base64.StdEncoding.EncodeToString(common.NewUUIDWithLength(16))
err := s.az.StageData(internal.StageDataOptions{
Name: name,
Id: id,
Data: data,
Offset: 0,
})
s.assert.Nil(err)

ids := []string{}
ids = append(ids, id)
err = s.az.CommitData(internal.CommitDataOptions{
Name: name,
List: ids,
BlockSize: 1,
})
s.assert.Nil(err)

attr, err := s.az.GetAttr(internal.GetAttrOptions{Name: name})
s.assert.Nil(err)
s.assert.NotNil(attr)
s.assert.EqualValues(os.FileMode(0767), attr.Mode)

acl, err := getACL(s.az.storage.(*Datalake), name)
s.assert.Nil(err)
s.assert.Contains(acl, "user::rwx")
s.assert.Contains(acl, "group::rw-")
s.assert.Contains(acl, "other::rwx")
}

// func (s *datalakeTestSuite) TestRAGRS() {
// defer s.cleanupTest()
// // Setup
Expand Down
1 change: 1 addition & 0 deletions setup/advancedConfig.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,7 @@ azstorage:
cpk-enabled: true|false <enable client provided key encryption>
cpk-encryption-key: <customer provided base64-encoded AES-256 encryption key value>
cpk-encryption-key-sha256: <customer provided base64-encoded sha256 of the encryption key>
preserve-acl: true|false <preserve ACLs and Permissions set on file during updates>

# Mount all configuration
mountall:
Expand Down

0 comments on commit b07781c

Please sign in to comment.