From 7e602dbf393b793ac804b219cc5dadc3692fb363 Mon Sep 17 00:00:00 2001 From: anuar45 Date: Thu, 21 Nov 2019 12:15:47 +0100 Subject: [PATCH 01/47] stats: show deletes in stats and hide zero stats This shows deletes in the stats. It also doesn't show zero stats in order not to make the stats block too long. --- fs/accounting/stats.go | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/fs/accounting/stats.go b/fs/accounting/stats.go index 35c8d7db4e2f8..a2e44fb604470 100644 --- a/fs/accounting/stats.go +++ b/fs/accounting/stats.go @@ -292,7 +292,7 @@ func (s *StatsInfo) String() string { } } - _, _ = fmt.Fprintf(buf, "%s%10s / %s, %s, %s, ETA %s%s", + _, _ = fmt.Fprintf(buf, "%s%10s / %s, %s, %s, ETA %s%s\n", dateString, fs.SizeSuffix(s.bytes), fs.SizeSuffix(totalSize).Unit("Bytes"), @@ -313,16 +313,23 @@ func (s *StatsInfo) String() string { errorDetails = " (no need to retry)" } - _, _ = fmt.Fprintf(buf, ` -Errors: %10d%s -Checks: %10d / %d, %s -Transferred: %10d / %d, %s -Elapsed time: %10v -`, - s.errors, errorDetails, - s.checks, totalChecks, percent(s.checks, totalChecks), - s.transfers, totalTransfer, percent(s.transfers, totalTransfer), - dtRounded) + // Add only non zero stats + if s.errors != 0 { + _, _ = fmt.Fprintf(buf, "Errors: %10d%s\n", + s.errors, errorDetails) + } + if s.checks != 0 || totalChecks != 0 { + _, _ = fmt.Fprintf(buf, "Checks: %10d / %d, %s\n", + s.errors, totalChecks, percent(s.checks, totalChecks)) + } + if s.deletes != 0 { + _, _ = fmt.Fprintf(buf, "Deleted: %10d\n", s.deletes) + } + if s.transfers != 0 || totalTransfer != 0 { + _, _ = fmt.Fprintf(buf, "Transferred: %10d / %d, %s\n", + s.transfers, totalTransfer, percent(s.transfers, totalTransfer)) + } + _, _ = fmt.Fprintf(buf, "Elapsed time: %10v\n", dtRounded) } // checking and transferring have their own locking so unlock From 4641bd51161df0836b8f468f5ffb25b067cbfd9d Mon Sep 17 00:00:00 2001 From: Nick Craig-Wood Date: Thu, 21 Nov 2019 11:16:04 +0000 Subject: [PATCH 02/47] Add anuar45 to contributors --- docs/content/authors.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/content/authors.md b/docs/content/authors.md index 1524184f105e9..4597b0cd03ec2 100644 --- a/docs/content/authors.md +++ b/docs/content/authors.md @@ -313,3 +313,4 @@ Contributors * Marco Molteni * Ankur Gupta * Maciej Zimnoch + * anuar45 From 1db31d714907e6720163648fab3175dd5507979e Mon Sep 17 00:00:00 2001 From: Nick Craig-Wood Date: Thu, 21 Nov 2019 13:25:02 +0000 Subject: [PATCH 03/47] swift: fix parsing of X-Object-Manifest Before this change we forgot to URL decode the X-Object-Manifest in a dynamic large object. This problem was introduced by 2fe8285f89b494ea "swift: reserve segments of dynamic large object when delete objects in container what was enabled versioning." --- backend/swift/swift.go | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/backend/swift/swift.go b/backend/swift/swift.go index e23a07685257c..b263a8d3d39da 100644 --- a/backend/swift/swift.go +++ b/backend/swift/swift.go @@ -7,6 +7,7 @@ import ( "context" "fmt" "io" + "net/url" "path" "strconv" "strings" @@ -952,8 +953,8 @@ func (o *Object) isStaticLargeObject() (bool, error) { return o.hasHeader("X-Static-Large-Object") } -func (o *Object) isInContainerVersioning() (bool, error) { - _, headers, err := o.fs.c.Container(o.fs.root) +func (o *Object) isInContainerVersioning(container string) (bool, error) { + _, headers, err := o.fs.c.Container(container) if err != nil { return false, err } @@ -1130,6 +1131,10 @@ func (o *Object) getSegmentsDlo() (segmentsContainer string, prefix string, err return } dirManifest := o.headers["X-Object-Manifest"] + dirManifest, err = url.PathUnescape(dirManifest) + if err != nil { + return + } delimiter := strings.Index(dirManifest, "/") if len(dirManifest) == 0 || delimiter < 0 { err = errors.New("Missing or wrong structure of manifest of Dynamic large object") @@ -1341,7 +1346,7 @@ func (o *Object) Remove(ctx context.Context) (err error) { } // ...then segments if required if isDynamicLargeObject { - isInContainerVersioning, err := o.isInContainerVersioning() + isInContainerVersioning, err := o.isInContainerVersioning(container) if err != nil { return err } From a7d65bd51941e12cb63be968511bc0483ae654ad Mon Sep 17 00:00:00 2001 From: Nick Craig-Wood Date: Thu, 14 Nov 2019 22:00:30 +0000 Subject: [PATCH 04/47] sftp: add --sftp-skip-links to skip symlinks and non regular files - fixes #3716 This also corrects the symlink detection logic to only check symlink files. Previous to this it was checking all directories too which was making it do more stat calls than was necessary. --- backend/sftp/sftp.go | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/backend/sftp/sftp.go b/backend/sftp/sftp.go index d426b6af3792a..c528ee4ffbf12 100644 --- a/backend/sftp/sftp.go +++ b/backend/sftp/sftp.go @@ -156,6 +156,11 @@ Home directory can be found in a shared folder called "home" Default: "", Help: "The command used to read sha1 hashes. Leave blank for autodetect.", Advanced: true, + }, { + Name: "skip_links", + Default: false, + Help: "Set to skip any symlinks and any other non regular files.", + Advanced: true, }}, } fs.Register(fsi) @@ -177,6 +182,7 @@ type Options struct { SetModTime bool `config:"set_modtime"` Md5sumCommand string `config:"md5sum_command"` Sha1sumCommand string `config:"sha1sum_command"` + SkipLinks bool `config:"skip_links"` } // Fs stores the interface to the remote SFTP files @@ -600,12 +606,16 @@ func (f *Fs) List(ctx context.Context, dir string) (entries fs.DirEntries, err e remote := path.Join(dir, info.Name()) // If file is a symlink (not a regular file is the best cross platform test we can do), do a stat to // pick up the size and type of the destination, instead of the size and type of the symlink. - if !info.Mode().IsRegular() { + if !info.Mode().IsRegular() && !info.IsDir() { + if f.opt.SkipLinks { + // skip non regular file if SkipLinks is set + continue + } oldInfo := info info, err = f.stat(remote) if err != nil { if !os.IsNotExist(err) { - fs.Errorf(remote, "stat of non-regular file/dir failed: %v", err) + fs.Errorf(remote, "stat of non-regular file failed: %v", err) } info = oldInfo } From 420ae905b56a0a918fa7fc33a0c31e567edb1e84 Mon Sep 17 00:00:00 2001 From: Nick Craig-Wood Date: Mon, 25 Nov 2019 11:31:44 +0000 Subject: [PATCH 05/47] vfs: make sure existing files opened for write show correct size Before this change if an existing file was opened for write without truncate its size would show as 0 rather than the full size of the file. --- vfs/file.go | 8 ++- vfs/read_write_test.go | 118 +++++++++++++++++++++++++++++++++++------ 2 files changed, 110 insertions(+), 16 deletions(-) diff --git a/vfs/file.go b/vfs/file.go index eeecfddb1ef4d..ab6dbee4009af 100644 --- a/vfs/file.go +++ b/vfs/file.go @@ -37,13 +37,19 @@ type File struct { } // newFile creates a new File +// +// o may be nil func newFile(d *Dir, o fs.Object, leaf string) *File { - return &File{ + f := &File{ d: d, o: o, leaf: leaf, inode: newInode(), } + if o != nil { + f.size = o.Size() + } + return f } // String converts it to printable diff --git a/vfs/read_write_test.go b/vfs/read_write_test.go index 4bdfca57955d7..948d8b08284cc 100644 --- a/vfs/read_write_test.go +++ b/vfs/read_write_test.go @@ -21,16 +21,18 @@ func cleanup(t *testing.T, r *fstest.Run, vfs *VFS) { r.Finalise() } -// Open a file for write -func rwHandleCreateReadOnly(t *testing.T, r *fstest.Run) (*VFS, *RWFileHandle) { +// Create a file and open it with the flags passed in +func rwHandleCreateFlags(t *testing.T, r *fstest.Run, create bool, filename string, flags int) (*VFS, *RWFileHandle) { opt := DefaultOpt opt.CacheMode = CacheModeFull vfs := New(r.Fremote, &opt) - file1 := r.WriteObject(context.Background(), "dir/file1", "0123456789abcdef", t1) - fstest.CheckItems(t, r.Fremote, file1) + if create { + file1 := r.WriteObject(context.Background(), filename, "0123456789abcdef", t1) + fstest.CheckItems(t, r.Fremote, file1) + } - h, err := vfs.OpenFile("dir/file1", os.O_RDONLY, 0777) + h, err := vfs.OpenFile(filename, flags, 0777) require.NoError(t, err) fh, ok := h.(*RWFileHandle) require.True(t, ok) @@ -38,18 +40,14 @@ func rwHandleCreateReadOnly(t *testing.T, r *fstest.Run) (*VFS, *RWFileHandle) { return vfs, fh } +// Open a file for read +func rwHandleCreateReadOnly(t *testing.T, r *fstest.Run) (*VFS, *RWFileHandle) { + return rwHandleCreateFlags(t, r, true, "dir/file1", os.O_RDONLY) +} + // Open a file for write func rwHandleCreateWriteOnly(t *testing.T, r *fstest.Run) (*VFS, *RWFileHandle) { - opt := DefaultOpt - opt.CacheMode = CacheModeFull - vfs := New(r.Fremote, &opt) - - h, err := vfs.OpenFile("file1", os.O_WRONLY|os.O_CREATE, 0777) - require.NoError(t, err) - fh, ok := h.(*RWFileHandle) - require.True(t, ok) - - return vfs, fh + return rwHandleCreateFlags(t, r, false, "file1", os.O_WRONLY|os.O_CREATE) } // read data from the string @@ -494,6 +492,96 @@ func TestRWFileHandleReleaseWrite(t *testing.T) { assert.True(t, fh.closed) } +// check the size of the file through the open file (if not nil) and via stat +func assertSize(t *testing.T, vfs *VFS, fh *RWFileHandle, filepath string, size int64) { + if fh != nil { + assert.Equal(t, size, fh.Size()) + } + fi, err := vfs.Stat(filepath) + require.NoError(t, err) + assert.Equal(t, size, fi.Size()) +} + +func TestRWFileHandleSizeTruncateExisting(t *testing.T) { + r := fstest.NewRun(t) + vfs, fh := rwHandleCreateFlags(t, r, true, "dir/file1", os.O_WRONLY|os.O_TRUNC) + defer cleanup(t, r, vfs) + + // check initial size after opening + assertSize(t, vfs, fh, "dir/file1", 0) + + // write some bytes + n, err := fh.Write([]byte("hello")) + assert.NoError(t, err) + assert.Equal(t, 5, n) + + // check size after writing + assertSize(t, vfs, fh, "dir/file1", 5) + + // close + assert.NoError(t, fh.Close()) + + // check size after close + assertSize(t, vfs, nil, "dir/file1", 5) +} + +func TestRWFileHandleSizeCreateExisting(t *testing.T) { + r := fstest.NewRun(t) + vfs, fh := rwHandleCreateFlags(t, r, true, "dir/file1", os.O_WRONLY|os.O_CREATE) + defer cleanup(t, r, vfs) + + // check initial size after opening + assertSize(t, vfs, fh, "dir/file1", 16) + + // write some bytes + n, err := fh.Write([]byte("hello")) + assert.NoError(t, err) + assert.Equal(t, 5, n) + + // check size after writing + assertSize(t, vfs, fh, "dir/file1", 16) + + // write some more bytes + n, err = fh.Write([]byte("helloHELLOhello")) + assert.NoError(t, err) + assert.Equal(t, 15, n) + + // check size after writing + assertSize(t, vfs, fh, "dir/file1", 20) + + // close + assert.NoError(t, fh.Close()) + + // check size after close + assertSize(t, vfs, nil, "dir/file1", 20) +} + +func TestRWFileHandleSizeCreateNew(t *testing.T) { + r := fstest.NewRun(t) + vfs, fh := rwHandleCreateFlags(t, r, false, "file1", os.O_WRONLY|os.O_CREATE) + defer cleanup(t, r, vfs) + + // check initial size after opening + assertSize(t, vfs, fh, "file1", 0) + + // write some bytes + n, err := fh.Write([]byte("hello")) + assert.NoError(t, err) + assert.Equal(t, 5, n) + + // check size after writing + assertSize(t, vfs, fh, "file1", 5) + + // check size after writing + assertSize(t, vfs, fh, "file1", 5) + + // close + assert.NoError(t, fh.Close()) + + // check size after close + assertSize(t, vfs, nil, "file1", 5) +} + func testRWFileHandleOpenTest(t *testing.T, vfs *VFS, test *openTest) { fileName := "open-test-file" From c3751e9a503a2de66c908ab299c21b66dd80c865 Mon Sep 17 00:00:00 2001 From: SezalAgrawal Date: Tue, 26 Nov 2019 16:28:52 +0530 Subject: [PATCH 06/47] operations: fix dedupe continuing on errors like insufficientFilePermisson - fixes #3470 * Fix dedupe on merge continuing on errors like insufficientFilePermisson * Sorted the directories to remove recursion logic --- fs/operations/dedupe.go | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/fs/operations/dedupe.go b/fs/operations/dedupe.go index 7dfc9afc4a666..2851c219e7e24 100644 --- a/fs/operations/dedupe.go +++ b/fs/operations/dedupe.go @@ -211,12 +211,18 @@ func dedupeFindDuplicateDirs(ctx context.Context, f fs.Fs) ([][]fs.Directory, er if err != nil { return nil, errors.Wrap(err, "find duplicate dirs") } - duplicateDirs := [][]fs.Directory{} - for _, ds := range dirs { + // make sure parents are before children + duplicateNames := []string{} + for name, ds := range dirs { if len(ds) > 1 { - duplicateDirs = append(duplicateDirs, ds) + duplicateNames = append(duplicateNames, name) } } + sort.Strings(duplicateNames) + duplicateDirs := [][]fs.Directory{} + for _, name := range duplicateNames { + duplicateDirs = append(duplicateDirs, dirs[name]) + } return duplicateDirs, nil } @@ -235,7 +241,8 @@ func dedupeMergeDuplicateDirs(ctx context.Context, f fs.Fs, duplicateDirs [][]fs fs.Infof(dirs[0], "Merging contents of duplicate directories") err := mergeDirs(ctx, dirs) if err != nil { - return errors.Wrap(err, "merge duplicate dirs") + err = fs.CountError(err) + fs.Errorf(nil, "merge duplicate dirs: %v", err) } } else { fs.Infof(dirs[0], "NOT Merging contents of duplicate directories as --dry-run") @@ -251,23 +258,16 @@ func dedupeMergeDuplicateDirs(ctx context.Context, f fs.Fs, duplicateDirs [][]fs func Deduplicate(ctx context.Context, f fs.Fs, mode DeduplicateMode) error { fs.Infof(f, "Looking for duplicates using %v mode.", mode) - // Find duplicate directories first and fix them - repeat - // until all fixed - for { - duplicateDirs, err := dedupeFindDuplicateDirs(ctx, f) - if err != nil { - return err - } - if len(duplicateDirs) == 0 { - break - } + // Find duplicate directories first and fix them + duplicateDirs, err := dedupeFindDuplicateDirs(ctx, f) + if err != nil { + return err + } + if len(duplicateDirs) != 0 { err = dedupeMergeDuplicateDirs(ctx, f, duplicateDirs) if err != nil { return err } - if fs.Config.DryRun { - break - } } // find a hash to use @@ -275,7 +275,7 @@ func Deduplicate(ctx context.Context, f fs.Fs, mode DeduplicateMode) error { // Now find duplicate files files := map[string][]fs.Object{} - err := walk.ListR(ctx, f, "", true, fs.Config.MaxDepth, walk.ListObjects, func(entries fs.DirEntries) error { + err = walk.ListR(ctx, f, "", true, fs.Config.MaxDepth, walk.ListObjects, func(entries fs.DirEntries) error { entries.ForObject(func(o fs.Object) { remote := o.Remote() files[remote] = append(files[remote], o) From 11f44cff50603e985e1a28e2f6064bd7d090408a Mon Sep 17 00:00:00 2001 From: Garry McNulty Date: Mon, 18 Nov 2019 21:34:10 +0000 Subject: [PATCH 07/47] drive: add --drive-use-shared-date to use date file was shared instead of modified date - fixes #3624 --- backend/drive/drive.go | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/backend/drive/drive.go b/backend/drive/drive.go index 4aa82214e14aa..1e296b5aef1ec 100644 --- a/backend/drive/drive.go +++ b/backend/drive/drive.go @@ -326,6 +326,17 @@ Photos folder" option in your google drive settings. You can then copy or move the photos locally and use the date the image was taken (created) set as the modification date.`, Advanced: true, + }, { + Name: "use_shared_date", + Default: false, + Help: `Use date file was shared instead of modified date. + +Note that, as with "--drive-use-created-date", this flag may have +unexpected consequences when uploading/downloading files. + +If both this flag and "--drive-use-created-date" are set, the created +date is used.`, + Advanced: true, }, { Name: "list_chunk", Default: 1000, @@ -463,6 +474,7 @@ type Options struct { ImportExtensions string `config:"import_formats"` AllowImportNameChange bool `config:"allow_import_name_change"` UseCreatedDate bool `config:"use_created_date"` + UseSharedDate bool `config:"use_shared_date"` ListChunk int64 `config:"list_chunk"` Impersonate string `config:"impersonate"` AlternateExport bool `config:"alternate_export"` @@ -694,6 +706,9 @@ func (f *Fs) list(ctx context.Context, dirIDs []string, title string, directorie if f.opt.AuthOwnerOnly { fields += ",owners" } + if f.opt.UseSharedDate { + fields += ",sharedWithMeTime" + } if f.opt.SkipChecksumGphotos { fields += ",spaces" } @@ -1095,6 +1110,8 @@ func (f *Fs) newBaseObject(remote string, info *drive.File) baseObject { modifiedDate := info.ModifiedTime if f.opt.UseCreatedDate { modifiedDate = info.CreatedTime + } else if f.opt.UseSharedDate && info.SharedWithMeTime != "" { + modifiedDate = info.SharedWithMeTime } size := info.Size if f.opt.SizeAsQuota { From d72f3e31c0a82bf49079a40f5b933096a7498d1d Mon Sep 17 00:00:00 2001 From: Marco Molteni Date: Fri, 22 Nov 2019 21:09:16 +0100 Subject: [PATCH 08/47] docs/install: explain how to workaround macOS Gatekeeper requiring notarization Fix #3689 --- docs/content/install.md | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/docs/content/install.md b/docs/content/install.md index adc77663f259e..e054ec1d50423 100644 --- a/docs/content/install.md +++ b/docs/content/install.md @@ -56,7 +56,14 @@ Run `rclone config` to setup. See [rclone config docs](/docs/) for more details. rclone config -## macOS installation from precompiled binary ## +## macOS installation with brew ## + + brew install rclone + +## macOS installation from precompiled binary, using curl ## + +To avoid problems with macOS gatekeeper enforcing the binary to be signed and +notarized it is enough to download with `curl`. Download the latest version of rclone. @@ -81,6 +88,19 @@ Run `rclone config` to setup. See [rclone config docs](/docs/) for more details. rclone config +## macOS installation from precompiled binary, using a web browser ## + +When downloading a binary with a web browser, the browser will set the macOS +gatekeeper quarantine attribute. Starting from Catalina, when attempting to run +`rclone`, a pop-up will appear saying: + + “rclone” cannot be opened because the developer cannot be verified. + macOS cannot verify that this app is free from malware. + +The simplest fix is to run + + xattr -d com.apple.quarantine rclone + ## Install with docker ## The rclone maintains a [docker image for rclone](https://hub.docker.com/r/rclone/rclone). From 4195bd78801cd138a3c81f922cffbe852621b3b0 Mon Sep 17 00:00:00 2001 From: buengese Date: Wed, 20 Nov 2019 00:10:38 +0100 Subject: [PATCH 09/47] jottacloud: use new auth method used by official client --- backend/jottacloud/api/types.go | 21 ++- backend/jottacloud/jottacloud.go | 219 ++++++++++++------------------- 2 files changed, 100 insertions(+), 140 deletions(-) diff --git a/backend/jottacloud/api/types.go b/backend/jottacloud/api/types.go index e2564802225eb..83d4db738ec50 100644 --- a/backend/jottacloud/api/types.go +++ b/backend/jottacloud/api/types.go @@ -46,13 +46,26 @@ func (t Time) String() string { return time.Time(t).Format(timeFormat) } // APIString returns Time string in Jottacloud API format func (t Time) APIString() string { return time.Time(t).Format(apiTimeFormat) } +// LoginToken is struct representing the login token generated in the WebUI +type LoginToken struct { + Username string `json:"username"` + Realm string `json:"realm"` + WellKnownLink string `json:"well_known_link"` + AuthToken string `json:"auth_token"` +} + // TokenJSON is the struct representing the HTTP response from OAuth2 // providers returning a token in JSON form. type TokenJSON struct { - AccessToken string `json:"access_token"` - TokenType string `json:"token_type"` - RefreshToken string `json:"refresh_token"` - ExpiresIn int32 `json:"expires_in"` // at least PayPal returns string, while most return number + AccessToken string `json:"access_token"` + ExpiresIn int32 `json:"expires_in"` // at least PayPal returns string, while most return number + RefreshExpiresIn int32 `json:"refresh_expires_in"` + RefreshToken string `json:"refresh_token"` + TokenType string `json:"token_type"` + IDToken string `json:"id_token"` + NotBeforePolicy int32 `json:"not-before-policy"` + SessionState string `json:"session_state"` + Scope string `json:"scope"` } // JSON structures returned by new API diff --git a/backend/jottacloud/jottacloud.go b/backend/jottacloud/jottacloud.go index 07aef1042ebfe..a19b81f9307ac 100644 --- a/backend/jottacloud/jottacloud.go +++ b/backend/jottacloud/jottacloud.go @@ -4,12 +4,13 @@ import ( "bytes" "context" "crypto/md5" + "encoding/base64" "encoding/hex" + "encoding/json" "fmt" "io" "io/ioutil" "log" - "math/rand" "net/http" "net/url" "os" @@ -25,7 +26,6 @@ import ( "github.com/rclone/rclone/fs/config" "github.com/rclone/rclone/fs/config/configmap" "github.com/rclone/rclone/fs/config/configstruct" - "github.com/rclone/rclone/fs/config/obscure" "github.com/rclone/rclone/fs/encodings" "github.com/rclone/rclone/fs/fserrors" "github.com/rclone/rclone/fs/fshttp" @@ -41,29 +41,25 @@ const enc = encodings.JottaCloud // Globals const ( - minSleep = 10 * time.Millisecond - maxSleep = 2 * time.Second - decayConstant = 2 // bigger for slower decay, exponential - defaultDevice = "Jotta" - defaultMountpoint = "Archive" - rootURL = "https://www.jottacloud.com/jfs/" - apiURL = "https://api.jottacloud.com/" - baseURL = "https://www.jottacloud.com/" - tokenURL = "https://api.jottacloud.com/auth/v1/token" - registerURL = "https://api.jottacloud.com/auth/v1/register" - cachePrefix = "rclone-jcmd5-" - rcloneClientID = "nibfk8biu12ju7hpqomr8b1e40" - rcloneEncryptedClientSecret = "Vp8eAv7eVElMnQwN-kgU9cbhgApNDaMqWdlDi5qFydlQoji4JBxrGMF2" - configClientID = "client_id" - configClientSecret = "client_secret" - configDevice = "device" - configMountpoint = "mountpoint" - charset = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789" + minSleep = 10 * time.Millisecond + maxSleep = 2 * time.Second + decayConstant = 2 // bigger for slower decay, exponential + defaultDevice = "Jotta" + defaultMountpoint = "Archive" + rootURL = "https://www.jottacloud.com/jfs/" + apiURL = "https://api.jottacloud.com/" + baseURL = "https://www.jottacloud.com/" + tokenURL = "https://id.jottacloud.com/auth/realms/jottacloud/protocol/openid-connect/token" + cachePrefix = "rclone-jcmd5-" + configDevice = "device" + configMountpoint = "mountpoint" + configVersion = 1 ) var ( // Description of how to auth for this app for a personal account oauthConfig = &oauth2.Config{ + ClientID: "jottacli", Endpoint: oauth2.Endpoint{ AuthURL: tokenURL, TokenURL: tokenURL, @@ -81,43 +77,38 @@ func init() { NewFs: NewFs, Config: func(name string, m configmap.Mapper) { ctx := context.TODO() - tokenString, ok := m.Get("token") - if ok && tokenString != "" { - fmt.Printf("Already have a token - refresh?\n") - if !config.Confirm(false) { - return - } - } - srv := rest.NewClient(fshttp.NewClient(fs.Config)) - fmt.Printf("\nDo you want to create a machine specific API key?\n\nRclone has it's own Jottacloud API KEY which works fine as long as one only uses rclone on a single machine. When you want to use rclone with this account on more than one machine it's recommended to create a machine specific API key. These keys can NOT be shared between machines.\n\n") - if config.Confirm(false) { - deviceRegistration, err := registerDevice(ctx, srv) + refresh := false + if version, ok := m.Get("configVersion"); ok { + ver, err := strconv.Atoi(version) if err != nil { - log.Fatalf("Failed to register device: %v", err) + log.Fatalf("Failed to parse config version - corrupted config") } - - m.Set(configClientID, deviceRegistration.ClientID) - m.Set(configClientSecret, obscure.MustObscure(deviceRegistration.ClientSecret)) - fs.Debugf(nil, "Got clientID '%s' and clientSecret '%s'", deviceRegistration.ClientID, deviceRegistration.ClientSecret) + refresh = ver != configVersion + } else { + refresh = true } - clientID, ok := m.Get(configClientID) - if !ok { - clientID = rcloneClientID - } - clientSecret, ok := m.Get(configClientSecret) - if !ok { - clientSecret = rcloneEncryptedClientSecret + if refresh { + fmt.Printf("Config outdated - refreshing\n") + } else { + tokenString, ok := m.Get("token") + if ok && tokenString != "" { + fmt.Printf("Already have a token - refresh?\n") + if !config.Confirm(false) { + return + } + } } - oauthConfig.ClientID = clientID - oauthConfig.ClientSecret = obscure.MustReveal(clientSecret) - fmt.Printf("Username> ") - username := config.ReadLine() - password := config.GetPassword("Your Jottacloud password is only required during setup and will not be stored.") + clientConfig := *fs.Config + clientConfig.UserAgent = "JottaCli 0.6.18626 windows-amd64" + srv := rest.NewClient(fshttp.NewClient(&clientConfig)) - token, err := doAuth(ctx, srv, username, password) + fmt.Printf("Login Token> ") + loginToken := config.ReadLine() + + token, err := doAuth(ctx, srv, loginToken) if err != nil { log.Fatalf("Failed to get oauth token: %s", err) } @@ -143,6 +134,8 @@ func init() { m.Set(configDevice, device) m.Set(configMountpoint, mountpoint) } + + m.Set("configVersion", strconv.Itoa(configVersion)) }, Options: []fs.Option{{ Name: "md5_memory_limit", @@ -249,67 +242,51 @@ func shouldRetry(resp *http.Response, err error) (bool, error) { return fserrors.ShouldRetry(err) || fserrors.ShouldRetryHTTP(resp, retryErrorCodes), err } -// registerDevice register a new device for use with the jottacloud API -func registerDevice(ctx context.Context, srv *rest.Client) (reg *api.DeviceRegistrationResponse, err error) { - // random generator to generate random device names - seededRand := rand.New(rand.NewSource(time.Now().UnixNano())) - randonDeviceNamePartLength := 21 - randomDeviceNamePart := make([]byte, randonDeviceNamePartLength) - for i := range randomDeviceNamePart { - randomDeviceNamePart[i] = charset[seededRand.Intn(len(charset))] +// doAuth runs the actual token request +func doAuth(ctx context.Context, srv *rest.Client, loginTokenBase64 string) (token oauth2.Token, err error) { + loginTokenBytes, err := base64.StdEncoding.DecodeString(loginTokenBase64) + if err != nil { + return token, err } - randomDeviceName := "rclone-" + string(randomDeviceNamePart) - fs.Debugf(nil, "Trying to register device '%s'", randomDeviceName) - values := url.Values{} - values.Set("device_id", randomDeviceName) + var loginToken api.LoginToken + decoder := json.NewDecoder(bytes.NewReader(loginTokenBytes)) + err = decoder.Decode(&loginToken) + if err != nil { + return token, err + } + // we don't seem to need any data from this link but the API is not happy if skip it opts := rest.Opts{ - Method: "POST", - RootURL: registerURL, - ContentType: "application/x-www-form-urlencoded", - ExtraHeaders: map[string]string{"Authorization": "Bearer c2xrZmpoYWRsZmFramhkc2xma2phaHNkbGZramhhc2xkZmtqaGFzZGxrZmpobGtq"}, - Parameters: values, + Method: "GET", + RootURL: loginToken.WellKnownLink, + NoResponse: true, + } + _, err = srv.Call(ctx, &opts) + if err != nil { + return token, err } - var deviceRegistration *api.DeviceRegistrationResponse - _, err = srv.CallJSON(ctx, &opts, nil, &deviceRegistration) - return deviceRegistration, err -} - -// doAuth runs the actual token request -func doAuth(ctx context.Context, srv *rest.Client, username, password string) (token oauth2.Token, err error) { // prepare out token request with username and password values := url.Values{} - values.Set("grant_type", "PASSWORD") - values.Set("password", password) - values.Set("username", username) - values.Set("client_id", oauthConfig.ClientID) - values.Set("client_secret", oauthConfig.ClientSecret) - opts := rest.Opts{ + values.Set("client_id", "jottacli") + values.Set("grant_type", "password") + values.Set("password", loginToken.AuthToken) + values.Set("scope", "offline_access+openid") + values.Set("username", loginToken.Username) + values.Encode() + opts = rest.Opts{ Method: "POST", RootURL: oauthConfig.Endpoint.AuthURL, ContentType: "application/x-www-form-urlencoded", - Parameters: values, + Body: strings.NewReader(values.Encode()), } // do the first request var jsonToken api.TokenJSON - resp, err := srv.CallJSON(ctx, &opts, nil, &jsonToken) + _, err = srv.CallJSON(ctx, &opts, nil, &jsonToken) if err != nil { - // if 2fa is enabled the first request is expected to fail. We will do another request with the 2fa code as an additional http header - if resp != nil { - if resp.Header.Get("X-JottaCloud-OTP") == "required; SMS" { - fmt.Printf("This account uses 2 factor authentication you will receive a verification code via SMS.\n") - fmt.Printf("Enter verification code> ") - authCode := config.ReadLine() - - authCode = strings.Replace(authCode, "-", "", -1) // remove any "-" contained in the code so we have a 6 digit number - opts.ExtraHeaders = make(map[string]string) - opts.ExtraHeaders["X-Jottacloud-Otp"] = authCode - resp, err = srv.CallJSON(ctx, &opts, nil, &jsonToken) - } - } + return token, err } token.AccessToken = jsonToken.AccessToken @@ -471,29 +448,6 @@ func (f *Fs) filePath(file string) string { return urlPathEscape(f.filePathRaw(file)) } -// Jottacloud requires the grant_type 'refresh_token' string -// to be uppercase and throws a 400 Bad Request if we use the -// lower case used by the oauth2 module -// -// This filter catches all refresh requests, reads the body, -// changes the case and then sends it on -func grantTypeFilter(req *http.Request) { - if tokenURL == req.URL.String() { - // read the entire body - refreshBody, err := ioutil.ReadAll(req.Body) - if err != nil { - return - } - _ = req.Body.Close() - - // make the refresh token upper case - refreshBody = []byte(strings.Replace(string(refreshBody), "grant_type=refresh_token", "grant_type=REFRESH_TOKEN", 1)) - - // set the new ReadCloser (with a dummy Close()) - req.Body = ioutil.NopCloser(bytes.NewReader(refreshBody)) - } -} - // NewFs constructs an Fs from the path, container:path func NewFs(name, root string, m configmap.Mapper) (fs.Fs, error) { ctx := context.TODO() @@ -504,30 +458,23 @@ func NewFs(name, root string, m configmap.Mapper) (fs.Fs, error) { return nil, err } - rootIsDir := strings.HasSuffix(root, "/") - root = parsePath(root) - - clientID, ok := m.Get(configClientID) - if !ok { - clientID = rcloneClientID + var ok bool + var version string + if version, ok = m.Get("configVersion"); ok { + ver, err := strconv.Atoi(version) + if err != nil { + return nil, errors.New("Failed to parse config version") + } + ok = ver == configVersion } - clientSecret, ok := m.Get(configClientSecret) if !ok { - clientSecret = rcloneEncryptedClientSecret + return nil, errors.New("Outdated config - please reconfigure this backend") } - oauthConfig.ClientID = clientID - oauthConfig.ClientSecret = obscure.MustReveal(clientSecret) - // the oauth client for the api servers needs - // a filter to fix the grant_type issues (see above) + rootIsDir := strings.HasSuffix(root, "/") + root = parsePath(root) + baseClient := fshttp.NewClient(fs.Config) - if do, ok := baseClient.Transport.(interface { - SetRequestFilter(f func(req *http.Request)) - }); ok { - do.SetRequestFilter(grantTypeFilter) - } else { - fs.Debugf(name+":", "Couldn't add request filter - uploads will fail") - } oAuthClient, ts, err := oauthutil.NewClientWithBaseClient(name, m, oauthConfig, baseClient) if err != nil { return nil, errors.Wrap(err, "Failed to configure Jottacloud oauth client") From 8f33c932f2fd9cfce09316601b66e0e05e980f26 Mon Sep 17 00:00:00 2001 From: buengese Date: Wed, 20 Nov 2019 00:11:19 +0100 Subject: [PATCH 10/47] jottacloud: update docs for new auth method --- docs/content/jottacloud.md | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) diff --git a/docs/content/jottacloud.md b/docs/content/jottacloud.md index e6c7a1f2ee89e..2b7b19b5d74f2 100644 --- a/docs/content/jottacloud.md +++ b/docs/content/jottacloud.md @@ -11,7 +11,7 @@ Paths are specified as `remote:path` Paths may be as deep as required, eg `remote:directory/subdirectory`. -To configure Jottacloud you will need to enter your username and password and select a mountpoint. +To configure Jottacloud you will need to generate a personal security token in the Jottacloud web inteface. You will the option to do in your [account security settings](https://www.jottacloud.com/web/secure). Note that the web inteface may refer to this token as a JottaCli token. Here is an example of how to make a remote called `remote`. First run: @@ -42,16 +42,7 @@ n) No y/n> n Remote config -Do you want to create a machine specific API key? - -Rclone has it's own Jottacloud API KEY which works fine as long as one only uses rclone on a single machine. When you want to use rclone with this account on more than one machine it's recommended to create a machine specific API key. These keys can NOT be shared between machines. - -y) Yes -n) No -y/n> y -Username> 0xC4KE@gmail.com -Your Jottacloud password is only required during setup and will not be stored. -password: +Login Token> Do you want to use a non standard device/mountpoint e.g. for accessing files uploaded using the official Jottacloud client? @@ -74,11 +65,10 @@ Mountpoints> 1 [jotta] type = jottacloud user = 0xC4KE@gmail.com -client_id = ..... -client_secret = ........ token = {........} device = Jotta mountpoint = Archive +configVersion = 1 -------------------- y) Yes this is OK e) Edit this remote @@ -102,7 +92,7 @@ To copy a local directory to an Jottacloud directory called backup ### Devices and Mountpoints ### The official Jottacloud client registers a device for each computer you install it on and then creates a mountpoint for each folder you select for Backup. -The web interface uses a special device called Jotta for the Archive, Sync and Shared mountpoints. In most cases you'll want to use the Jotta/Archive device/mounpoint however if you want to access files uploaded by the official rclone provides the option to select other devices and mountpoints during config. +The web interface uses a special device called Jotta for the Archive, Sync and Shared mountpoints. In most cases you'll want to use the Jotta/Archive device/mounpoint however if you want to access files uploaded by any of the official clients rclone provides the option to select other devices and mountpoints during config. ### --fast-list ### From 162fdfe4552cd9cded599bc489b80ad97755f626 Mon Sep 17 00:00:00 2001 From: Fernando Date: Wed, 27 Nov 2019 12:40:24 +0100 Subject: [PATCH 11/47] mount: document remotes as network shares on Windows Provided instructions for mounting remotes as network shares/network drives in a Windows environment --- docs/content/commands/rclone_mount.md | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/docs/content/commands/rclone_mount.md b/docs/content/commands/rclone_mount.md index 925ac7136c8aa..f240755f990d0 100644 --- a/docs/content/commands/rclone_mount.md +++ b/docs/content/commands/rclone_mount.md @@ -65,6 +65,28 @@ infrastructure](https://github.com/billziss-gh/winfsp/wiki/WinFsp-Service-Archit which creates drives accessible for everyone on the system or alternatively using [the nssm service manager](https://nssm.cc/usage). +#### Mount as a network drive + +By default, rclone will mount the remote as a normal drive. However, you can also mount it as a **Network Drive** +(or **Network Share**, as mentioned in some places) + +Unlike other systems, Windows provides a different filesystem type for network drives. +Windows and other programs treat the network drives and fixed/removable drives differently: +In network drives, many I/O operations are optimized, as the high latency and low reliability +(compared to a normal drive) of a network is expected. + +Although many people prefer network shares to be mounted as normal system drives, this might cause +some issues, such as programs not working as expected or freezes and errors while operating with the +mounted remote in Windows Explorer. If you experience any of those, consider mounting rclone remotes as network shares, +as Windows expects normal drives to be fast and reliable, while cloud storage is far from that. +See also [Limitations](#limitations) section below for more info + +Add `--fuse-flag --VolumePrefix=\server\share` to your `mount` command, **replacing `share` with any other +name of your choice if you are mounting more than one remote**. Otherwise, the mountpoints will conflict and +your mounted filesystems will overlap. + +[Read more about drive mapping](https://en.wikipedia.org/wiki/Drive_mapping) + ### Limitations Without the use of "--vfs-cache-mode" this can only write files From ed39adc65b62af6a6f6d9b43792b06bafd2e19dd Mon Sep 17 00:00:00 2001 From: Nick Craig-Wood Date: Wed, 27 Nov 2019 11:40:44 +0000 Subject: [PATCH 12/47] Add Fernando to contributors --- docs/content/authors.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/content/authors.md b/docs/content/authors.md index 4597b0cd03ec2..9ddf34302bddf 100644 --- a/docs/content/authors.md +++ b/docs/content/authors.md @@ -314,3 +314,4 @@ Contributors * Ankur Gupta * Maciej Zimnoch * anuar45 + * Fernando From 4fbc90d1150bd0a53bfccd9a0d6f519c180ef0b9 Mon Sep 17 00:00:00 2001 From: Nick Craig-Wood Date: Sun, 24 Nov 2019 13:08:06 +0000 Subject: [PATCH 13/47] webdav: make nextcloud only upload SHA1 checksums When using nextcloud, before this change we only uploaded one of SHA1 or MD5 checksum in the OC-Checksum header with preference to SHA1 if both were set. This makes the MD5 checksums read as empty string which makes syncing with checksums less useful than they should be as all the MD5 checksums are blank. This change makes it so that we only upload the SHA1 to nextcloud. The behaviour of owncloud is unchanged as owncloud uses the checksum as an upload integrity check only and calculates its own checksums. See: https://forum.rclone.org/t/how-to-specify-hash-method-to-checksum/13055 --- backend/webdav/webdav.go | 60 +++++++++++++++++++++++----------------- 1 file changed, 35 insertions(+), 25 deletions(-) diff --git a/backend/webdav/webdav.go b/backend/webdav/webdav.go index 6d6d20b49fca4..aedd6f70a7fc1 100644 --- a/backend/webdav/webdav.go +++ b/backend/webdav/webdav.go @@ -113,7 +113,8 @@ type Fs struct { canStream bool // set if can stream useOCMtime bool // set if can use X-OC-Mtime retryWithZeroDepth bool // some vendors (sharepoint) won't list files when Depth is 1 (our default) - hasChecksums bool // set if can use owncloud style checksums + hasMD5 bool // set if can use owncloud style checksums for MD5 + hasSHA1 bool // set if can use owncloud style checksums for SHA1 } // Object describes a webdav object @@ -215,7 +216,7 @@ func (f *Fs) readMetaDataForPath(ctx context.Context, path string, depth string) }, NoRedirect: true, } - if f.hasChecksums { + if f.hasMD5 || f.hasSHA1 { opts.Body = bytes.NewBuffer(owncloudProps) } var result api.Multistatus @@ -430,11 +431,12 @@ func (f *Fs) setQuirks(ctx context.Context, vendor string) error { f.canStream = true f.precision = time.Second f.useOCMtime = true - f.hasChecksums = true + f.hasMD5 = true + f.hasSHA1 = true case "nextcloud": f.precision = time.Second f.useOCMtime = true - f.hasChecksums = true + f.hasSHA1 = true case "sharepoint": // To mount sharepoint, two Cookies are required // They have to be set instead of BasicAuth @@ -536,7 +538,7 @@ func (f *Fs) listAll(ctx context.Context, dir string, directoriesOnly bool, file "Depth": depth, }, } - if f.hasChecksums { + if f.hasMD5 || f.hasSHA1 { opts.Body = bytes.NewBuffer(owncloudProps) } var result api.Multistatus @@ -945,10 +947,14 @@ func (f *Fs) DirMove(ctx context.Context, src fs.Fs, srcRemote, dstRemote string // Hashes returns the supported hash sets. func (f *Fs) Hashes() hash.Set { - if f.hasChecksums { - return hash.NewHashSet(hash.MD5, hash.SHA1) + hashes := hash.Set(hash.None) + if f.hasMD5 { + hashes.Add(hash.MD5) } - return hash.Set(hash.None) + if f.hasSHA1 { + hashes.Add(hash.SHA1) + } + return hashes } // About gets quota information @@ -1015,13 +1021,11 @@ func (o *Object) Remote() string { // Hash returns the SHA1 or MD5 of an object returning a lowercase hex string func (o *Object) Hash(ctx context.Context, t hash.Type) (string, error) { - if o.fs.hasChecksums { - switch t { - case hash.SHA1: - return o.sha1, nil - case hash.MD5: - return o.md5, nil - } + if t == hash.MD5 && o.fs.hasMD5 { + return o.md5, nil + } + if t == hash.SHA1 && o.fs.hasSHA1 { + return o.sha1, nil } return "", hash.ErrUnsupported } @@ -1042,10 +1046,14 @@ func (o *Object) setMetaData(info *api.Prop) (err error) { o.hasMetaData = true o.size = info.Size o.modTime = time.Time(info.Modified) - if o.fs.hasChecksums { + if o.fs.hasMD5 || o.fs.hasSHA1 { hashes := info.Hashes() - o.sha1 = hashes[hash.SHA1] - o.md5 = hashes[hash.MD5] + if o.fs.hasSHA1 { + o.sha1 = hashes[hash.SHA1] + } + if o.fs.hasMD5 { + o.md5 = hashes[hash.MD5] + } } return nil } @@ -1126,19 +1134,21 @@ func (o *Object) Update(ctx context.Context, in io.Reader, src fs.ObjectInfo, op ContentLength: &size, // FIXME this isn't necessary with owncloud - See https://github.com/nextcloud/nextcloud-snap/issues/365 ContentType: fs.MimeType(ctx, src), } - if o.fs.useOCMtime || o.fs.hasChecksums { + if o.fs.useOCMtime || o.fs.hasMD5 || o.fs.hasSHA1 { opts.ExtraHeaders = map[string]string{} if o.fs.useOCMtime { opts.ExtraHeaders["X-OC-Mtime"] = fmt.Sprintf("%f", float64(src.ModTime(ctx).UnixNano())/1e9) } - if o.fs.hasChecksums { - // Set an upload checksum - prefer SHA1 - // - // This is used as an upload integrity test. If we set - // only SHA1 here, owncloud will calculate the MD5 too. + // Set one upload checksum + // Owncloud uses one checksum only to check the upload and stores its own SHA1 and MD5 + // Nextcloud stores the checksum you supply (SHA1 or MD5) but only stores one + if o.fs.hasSHA1 { if sha1, _ := src.Hash(ctx, hash.SHA1); sha1 != "" { opts.ExtraHeaders["OC-Checksum"] = "SHA1:" + sha1 - } else if md5, _ := src.Hash(ctx, hash.MD5); md5 != "" { + } + } + if o.fs.hasMD5 && opts.ExtraHeaders["OC-Checksum"] == "" { + if md5, _ := src.Hash(ctx, hash.MD5); md5 != "" { opts.ExtraHeaders["OC-Checksum"] = "MD5:" + md5 } } From 705e4694ede983f0d7e813475a677e0b6bd2bf91 Mon Sep 17 00:00:00 2001 From: Nick Craig-Wood Date: Thu, 21 Nov 2019 11:06:20 +0000 Subject: [PATCH 14/47] webdav: fix case of "Bearer" in Authorization: header to agree with RFC Before this change rclone used "Authorization: BEARER token". However according the the RFC this should be "Bearer" https://tools.ietf.org/html/rfc6750#section-2.1 This changes it to "Authorization: Bearer token" Fixes #3751 and interop with Salesforce Webdav server --- backend/webdav/webdav.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/webdav/webdav.go b/backend/webdav/webdav.go index aedd6f70a7fc1..4a21ea05fb33f 100644 --- a/backend/webdav/webdav.go +++ b/backend/webdav/webdav.go @@ -384,7 +384,7 @@ func NewFs(name, root string, m configmap.Mapper) (fs.Fs, error) { // sets the BearerToken up func (f *Fs) setBearerToken(token string) { f.opt.BearerToken = token - f.srv.SetHeader("Authorization", "BEARER "+token) + f.srv.SetHeader("Authorization", "Bearer "+token) } // fetch the bearer token using the command From 33c80bbb96aa7dc3b20393553931f094f0e04f04 Mon Sep 17 00:00:00 2001 From: Nick Craig-Wood Date: Thu, 28 Nov 2019 10:03:48 +0000 Subject: [PATCH 15/47] jottacloud: add URL to generate Login Token to config wizard --- backend/jottacloud/jottacloud.go | 1 + docs/content/jottacloud.md | 1 + 2 files changed, 2 insertions(+) diff --git a/backend/jottacloud/jottacloud.go b/backend/jottacloud/jottacloud.go index a19b81f9307ac..91b987a57f038 100644 --- a/backend/jottacloud/jottacloud.go +++ b/backend/jottacloud/jottacloud.go @@ -105,6 +105,7 @@ func init() { clientConfig.UserAgent = "JottaCli 0.6.18626 windows-amd64" srv := rest.NewClient(fshttp.NewClient(&clientConfig)) + fmt.Printf("Generate a personal login token here: https://www.jottacloud.com/web/secure\n") fmt.Printf("Login Token> ") loginToken := config.ReadLine() diff --git a/docs/content/jottacloud.md b/docs/content/jottacloud.md index 2b7b19b5d74f2..f1c027326a71a 100644 --- a/docs/content/jottacloud.md +++ b/docs/content/jottacloud.md @@ -42,6 +42,7 @@ n) No y/n> n Remote config +Generate a personal login token here: https://www.jottacloud.com/web/secure Login Token> Do you want to use a non standard device/mountpoint e.g. for accessing files uploaded using the official Jottacloud client? From d3b0bed09124082f40c22d2f7fecec250c0a507b Mon Sep 17 00:00:00 2001 From: Nick Craig-Wood Date: Wed, 27 Nov 2019 16:10:24 +0000 Subject: [PATCH 16/47] drive: make sure invalid auth for teamdrives always reports an error For some reason Google doesn't return an error if you use a service account with the wrong permissions to list a team drive. This gives the user the false impression that the drive is empty. This change: - calls teamdrives get on rclone about - calls teamdrives get on a listing of the root which returned no entries These will both detect a team drive which has the incorrect auth and workaround the issue. Fixes: #3763 See: https://forum.rclone.org/t/rclone-missing-error-code-when-sas-have-no-permission/13086 See: https://forum.rclone.org/t/need-need-bug-verification-rclone-about-doesnt-work-on-teamdrives-empty-output/13105 --- backend/drive/drive.go | 47 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 46 insertions(+), 1 deletion(-) diff --git a/backend/drive/drive.go b/backend/drive/drive.go index 1e296b5aef1ec..a12c2f9245134 100644 --- a/backend/drive/drive.go +++ b/backend/drive/drive.go @@ -1480,6 +1480,14 @@ func (f *Fs) List(ctx context.Context, dir string) (entries fs.DirEntries, err e if iErr != nil { return nil, iErr } + // If listing the root of a teamdrive and got no entries, + // double check we have access + if f.isTeamDrive && len(entries) == 0 && f.root == "" && dir == "" { + err = f.teamDriveOK(ctx) + if err != nil { + return nil, err + } + } return entries, nil } @@ -1617,6 +1625,7 @@ func (f *Fs) ListR(ctx context.Context, dir string, callback fs.ListRCallback) ( out := make(chan error, fs.Config.Checkers) list := walk.NewListRHelper(callback) overflow := []listREntry{} + listed := 0 cb := func(entry fs.DirEntry) error { mu.Lock() @@ -1629,6 +1638,7 @@ func (f *Fs) ListR(ctx context.Context, dir string, callback fs.ListRCallback) ( overflow = append(overflow, listREntry{d.ID(), d.Remote()}) } } + listed++ return list.Add(entry) } @@ -1685,7 +1695,21 @@ func (f *Fs) ListR(ctx context.Context, dir string, callback fs.ListRCallback) ( return err } - return list.Flush() + err = list.Flush() + if err != nil { + return err + } + + // If listing the root of a teamdrive and got no entries, + // double check we have access + if f.isTeamDrive && listed == 0 && f.root == "" && dir == "" { + err = f.teamDriveOK(ctx) + if err != nil { + return err + } + } + + return nil } // itemToDirEntry converts a drive.File to a fs.DirEntry. @@ -2058,9 +2082,30 @@ func (f *Fs) CleanUp(ctx context.Context) error { return nil } +// teamDriveOK checks to see if we can access the team drive +func (f *Fs) teamDriveOK(ctx context.Context) (err error) { + if !f.isTeamDrive { + return nil + } + var td *drive.Drive + err = f.pacer.Call(func() (bool, error) { + td, err = f.svc.Drives.Get(f.opt.TeamDriveID).Fields("name,id,capabilities,createdTime,restrictions").Context(ctx).Do() + return shouldRetry(err) + }) + if err != nil { + return errors.Wrap(err, "failed to get Team/Shared Drive info") + } + fs.Debugf(f, "read info from team drive %q", td.Name) + return err +} + // About gets quota information func (f *Fs) About(ctx context.Context) (*fs.Usage, error) { if f.isTeamDrive { + err := f.teamDriveOK(ctx) + if err != nil { + return nil, err + } // Teamdrives don't appear to have a usage API so just return empty return &fs.Usage{}, nil } From e2773b3b4e018c877077ea15ee07f8a9bcf1fa7d Mon Sep 17 00:00:00 2001 From: Danil Semelenov Date: Fri, 29 Nov 2019 13:38:07 +0300 Subject: [PATCH 17/47] Fix completion with an encrypted config Closes #3767. --- cmd/help.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/cmd/help.go b/cmd/help.go index d81fcba7ca56d..10d5766c9d9a4 100644 --- a/cmd/help.go +++ b/cmd/help.go @@ -46,10 +46,11 @@ __rclone_custom_func() { else __rclone_init_completion -n : || return fi + local rclone=(command rclone --ask-password=false) if [[ $cur != *:* ]]; then local ifs=$IFS IFS=$'\n' - local remotes=($(command rclone listremotes)) + local remotes=($("${rclone[@]}" listremotes 2> /dev/null)) IFS=$ifs local remote for remote in "${remotes[@]}"; do @@ -68,7 +69,7 @@ __rclone_custom_func() { fi local ifs=$IFS IFS=$'\n' - local lines=($(rclone lsf "${cur%%:*}:$prefix" 2>/dev/null)) + local lines=($("${rclone[@]}" lsf "${cur%%:*}:$prefix" 2> /dev/null)) IFS=$ifs local line for line in "${lines[@]}"; do From c05bb63f9679e09ecb078e7477f3efd02846c2ad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aleksandar=20Jankovi=C4=87?= Date: Mon, 2 Dec 2019 16:15:59 +0100 Subject: [PATCH 18/47] s3: fix DisableChecksum condition --- backend/s3/s3.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/s3/s3.go b/backend/s3/s3.go index c49d9c9b7eee5..991e5bf4b72d9 100644 --- a/backend/s3/s3.go +++ b/backend/s3/s3.go @@ -2040,7 +2040,7 @@ func (o *Object) Update(ctx context.Context, in io.Reader, src fs.ObjectInfo, op // read the md5sum if available for non multpart and if // disable checksum isn't present. var md5sum string - if !multipart || !o.fs.opt.DisableChecksum { + if !multipart && !o.fs.opt.DisableChecksum { hash, err := src.Hash(ctx, hash.MD5) if err == nil && matchMd5.MatchString(hash) { hashBytes, err := hex.DecodeString(hash) From f4746f5064e057d977cd9832c07cf97d403f2212 Mon Sep 17 00:00:00 2001 From: Nick Craig-Wood Date: Mon, 2 Dec 2019 17:00:54 +0000 Subject: [PATCH 19/47] s3: fix multipart copy - fixes #3778 Before this change multipart copies were giving the error Range specified is not valid for source object of size This was due to an off by one error in the range source introduced in 7b1274e29ac51231 "s3: support for multipart copy" --- backend/s3/s3.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/backend/s3/s3.go b/backend/s3/s3.go index 991e5bf4b72d9..a507c9078a616 100644 --- a/backend/s3/s3.go +++ b/backend/s3/s3.go @@ -1666,8 +1666,8 @@ func calculateRange(partSize, partIndex, numParts, totalSize int64) string { start := partIndex * partSize var ends string if partIndex == numParts-1 { - if totalSize >= 0 { - ends = strconv.FormatInt(totalSize, 10) + if totalSize >= 1 { + ends = strconv.FormatInt(totalSize-1, 10) } } else { ends = strconv.FormatInt(start+partSize-1, 10) From 0d10640aaaf93291a1e0f942a7b71ba7077cd72e Mon Sep 17 00:00:00 2001 From: Nick Craig-Wood Date: Mon, 2 Dec 2019 17:14:57 +0000 Subject: [PATCH 20/47] s3: add --s3-copy-cutoff for size to switch to multipart copy Before this change we used the same (relatively low limits) for server side copy as we did for multipart uploads. It doesn't make sense to use the same limits since no data is being downloaded or uploaded for a server side copy. This change introduces a new parameter --s3-copy-cutoff to control when the switch from single to multipart server size copy happens and defaults it to the maximum 5GB. This makes server side copies much more efficient. It also fixes the erroneous error when trying to set the modification time of a file bigger than 5GB. See #3778 --- backend/s3/s3.go | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/backend/s3/s3.go b/backend/s3/s3.go index a507c9078a616..5fe8cce298f5a 100644 --- a/backend/s3/s3.go +++ b/backend/s3/s3.go @@ -715,6 +715,16 @@ file you can stream upload is 48GB. If you wish to stream upload larger files then you will need to increase chunk_size.`, Default: minChunkSize, Advanced: true, + }, { + Name: "copy_cutoff", + Help: `Cutoff for switching to multipart copy + +Any files larger than this that need to be server side copied will be +copied in chunks of this size. + +The minimum is 0 and the maximum is 5GB.`, + Default: fs.SizeSuffix(maxSizeForCopy), + Advanced: true, }, { Name: "disable_checksum", Help: "Don't store MD5 checksum with object metadata", @@ -809,6 +819,7 @@ type Options struct { SSEKMSKeyID string `config:"sse_kms_key_id"` StorageClass string `config:"storage_class"` UploadCutoff fs.SizeSuffix `config:"upload_cutoff"` + CopyCutoff fs.SizeSuffix `config:"copy_cutoff"` ChunkSize fs.SizeSuffix `config:"chunk_size"` DisableChecksum bool `config:"disable_checksum"` SessionToken string `config:"session_token"` @@ -1653,7 +1664,7 @@ func (f *Fs) copy(ctx context.Context, req *s3.CopyObjectInput, dstBucket, dstPa req.StorageClass = &f.opt.StorageClass } - if srcSize >= int64(f.opt.UploadCutoff) { + if srcSize >= int64(f.opt.CopyCutoff) { return f.copyMultipart(ctx, req, dstBucket, dstPath, srcBucket, srcPath, srcSize) } return f.pacer.Call(func() (bool, error) { @@ -1704,7 +1715,7 @@ func (f *Fs) copyMultipart(ctx context.Context, req *s3.CopyObjectInput, dstBuck } }() - partSize := int64(f.opt.ChunkSize) + partSize := int64(f.opt.CopyCutoff) numParts := (srcSize-1)/partSize + 1 var parts []*s3.CompletedPart @@ -1932,11 +1943,6 @@ func (o *Object) SetModTime(ctx context.Context, modTime time.Time) error { } o.meta[metaMtime] = aws.String(swift.TimeToFloatString(modTime)) - if o.bytes >= maxSizeForCopy { - fs.Debugf(o, "SetModTime is unsupported for objects bigger than %v bytes", fs.SizeSuffix(maxSizeForCopy)) - return nil - } - // Can't update metadata here, so return this error to force a recopy if o.storageClass == "GLACIER" || o.storageClass == "DEEP_ARCHIVE" { return fs.ErrorCantSetModTime From 707e51eac7dcb6f50d8d53ba5621bea7c32abb97 Mon Sep 17 00:00:00 2001 From: David Cole Date: Wed, 4 Dec 2019 04:08:52 -0800 Subject: [PATCH 21/47] docs: correct typo in gui docs --- docs/content/gui.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/content/gui.md b/docs/content/gui.md index 29f41d4993aed..d5aa0e825430e 100644 --- a/docs/content/gui.md +++ b/docs/content/gui.md @@ -99,7 +99,7 @@ Or instead of htpassword if you just want a single user and password: The GUI is being developed in the: [rclone/rclone-webui-react respository](https://github.com/rclone/rclone-webui-react). -Bug reports and contributions very welcome welcome :-) +Bug reports and contributions are very welcome :-) If you have questions then please ask them on the [rclone forum](https://forum.rclone.org/). From 2150cf736213332d0a049e1746710c3bdb7dd78e Mon Sep 17 00:00:00 2001 From: Nick Craig-Wood Date: Wed, 4 Dec 2019 12:09:27 +0000 Subject: [PATCH 22/47] =?UTF-8?q?Add=20email=20for=20Aleksandar=20Jankovi?= =?UTF-8?q?=C4=87?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/content/authors.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/content/authors.md b/docs/content/authors.md index 9ddf34302bddf..212b6c1f8ad6f 100644 --- a/docs/content/authors.md +++ b/docs/content/authors.md @@ -263,7 +263,7 @@ Contributors * garry415 * forgems * Florian Apolloner - * Aleksandar Jankovic + * Aleksandar Janković * Maran * nguyenhuuluan434 * Laura Hausmann From e48145f9591534ca9a40cd12fcb72fa0f2f74fb6 Mon Sep 17 00:00:00 2001 From: Nick Craig-Wood Date: Wed, 4 Dec 2019 12:14:30 +0000 Subject: [PATCH 23/47] Add David Cole to contributors --- docs/content/authors.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/content/authors.md b/docs/content/authors.md index 212b6c1f8ad6f..3729840c56a30 100644 --- a/docs/content/authors.md +++ b/docs/content/authors.md @@ -315,3 +315,4 @@ Contributors * Maciej Zimnoch * anuar45 * Fernando + * David Cole From cb97239a608eb64d101c324e747918a5d882866f Mon Sep 17 00:00:00 2001 From: Nick Craig-Wood Date: Wed, 4 Dec 2019 13:48:03 +0000 Subject: [PATCH 24/47] build: pin actions/checkout to v1 to fix build failure --- .github/workflows/build.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 87f23905cedb8..c85b8d77103d3 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -102,7 +102,7 @@ jobs: steps: - name: Checkout - uses: actions/checkout@master + uses: actions/checkout@v1 with: path: ./src/github.com/${{ github.repository }} @@ -211,7 +211,7 @@ jobs: steps: - name: Checkout - uses: actions/checkout@master + uses: actions/checkout@v1 with: path: ./src/github.com/${{ github.repository }} From 572c1079a59c4cefff99fb49e4291fe25f0880a5 Mon Sep 17 00:00:00 2001 From: Nick Craig-Wood Date: Mon, 2 Dec 2019 16:02:38 +0000 Subject: [PATCH 25/47] fserrors: Make a new NoLowLevelRetry error and don't retry them #3777 --- fs/fserrors/error.go | 52 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/fs/fserrors/error.go b/fs/fserrors/error.go index c1aa31e89849e..aa3e6067f0e98 100644 --- a/fs/fserrors/error.go +++ b/fs/fserrors/error.go @@ -178,6 +178,53 @@ func IsNoRetryError(err error) (isNoRetry bool) { return } +// NoLowLevelRetrier is an optional interface for error as to whether +// the operation should not be retried at a low level. +// +// NoLowLevelRetry errors won't be retried by low level retry loops. +type NoLowLevelRetrier interface { + error + NoLowLevelRetry() bool +} + +// wrappedNoLowLevelRetryError is an error wrapped so it will satisfy the +// NoLowLevelRetrier interface and return true +type wrappedNoLowLevelRetryError struct { + error +} + +// NoLowLevelRetry interface +func (err wrappedNoLowLevelRetryError) NoLowLevelRetry() bool { + return true +} + +// Check interface +var _ NoLowLevelRetrier = wrappedNoLowLevelRetryError{error(nil)} + +// NoLowLevelRetryError makes an error which indicates the sync +// shouldn't be low level retried. +func NoLowLevelRetryError(err error) error { + return wrappedNoLowLevelRetryError{err} +} + +// Cause returns the underlying error +func (err wrappedNoLowLevelRetryError) Cause() error { + return err.error +} + +// IsNoLowLevelRetryError returns true if err conforms to the NoLowLevelRetry +// interface and calling the NoLowLevelRetry method returns true. +func IsNoLowLevelRetryError(err error) (isNoLowLevelRetry bool) { + errors.Walk(err, func(err error) bool { + if r, ok := err.(NoLowLevelRetrier); ok { + isNoLowLevelRetry = r.NoLowLevelRetry() + return true + } + return false + }) + return +} + // RetryAfter is an optional interface for error as to whether the // operation should be retried after a given delay // @@ -345,6 +392,11 @@ func ShouldRetry(err error) bool { return false } + // If error has been marked to NoLowLevelRetry then don't retry + if IsNoLowLevelRetryError(err) { + return false + } + // Find root cause if available retriable, err := Cause(err) if retriable { From 684dbe0e9d6a671ed84920748b76bfd2675df889 Mon Sep 17 00:00:00 2001 From: Nick Craig-Wood Date: Mon, 2 Dec 2019 16:03:33 +0000 Subject: [PATCH 26/47] local: make source file being updated errors be NoLowLevelRetry errors #3777 --- backend/local/local.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/backend/local/local.go b/backend/local/local.go index 2b421a4344c2b..12e52b31d2b74 100644 --- a/backend/local/local.go +++ b/backend/local/local.go @@ -820,10 +820,10 @@ func (file *localOpenFile) Read(p []byte) (n int, err error) { return 0, errors.Wrap(err, "can't read status of source file while transferring") } if file.o.size != fi.Size() { - return 0, errors.Errorf("can't copy - source file is being updated (size changed from %d to %d)", file.o.size, fi.Size()) + return 0, fserrors.NoLowLevelRetryError(errors.Errorf("can't copy - source file is being updated (size changed from %d to %d)", file.o.size, fi.Size())) } if !file.o.modTime.Equal(fi.ModTime()) { - return 0, errors.Errorf("can't copy - source file is being updated (mod time changed from %v to %v)", file.o.modTime, fi.ModTime()) + return 0, fserrors.NoLowLevelRetryError(errors.Errorf("can't copy - source file is being updated (mod time changed from %v to %v)", file.o.modTime, fi.ModTime())) } } From 4537d9b5cf2982bbeeec828c15e77ee524e2f414 Mon Sep 17 00:00:00 2001 From: Nick Craig-Wood Date: Mon, 2 Dec 2019 16:04:03 +0000 Subject: [PATCH 27/47] operations: make reopen code error on NoLowLevelRetry errors - fixes #3777 --- fs/operations/reopen.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/operations/reopen.go b/fs/operations/reopen.go index d0af5f5b3b190..879bffee35c43 100644 --- a/fs/operations/reopen.go +++ b/fs/operations/reopen.go @@ -7,6 +7,7 @@ import ( "github.com/pkg/errors" "github.com/rclone/rclone/fs" + "github.com/rclone/rclone/fs/fserrors" ) // reOpen is a wrapper for an object reader which reopens the stream on error @@ -104,7 +105,7 @@ func (h *reOpen) Read(p []byte) (n int, err error) { h.err = err } h.read += int64(n) - if err != nil && err != io.EOF { + if err != nil && err != io.EOF && !fserrors.IsNoLowLevelRetryError(err) { // close underlying stream h.opened = false _ = h.rc.Close() From 50bb9b7bddfd8e2dca92af935082b173a690d1f4 Mon Sep 17 00:00:00 2001 From: Nick Craig-Wood Date: Sat, 7 Dec 2019 13:26:55 +0000 Subject: [PATCH 28/47] check: fix --one-way recursing more directories than it needs to Before this change rclone traversed all directories in the destination. After this change rclone doesn't traverse directories in the destination that don't exist in the source if the `--one-way` flag is set. See: https://forum.rclone.org/t/check-with-one-way-flag-should-not-traverses-all-destination-directories/13263 --- fs/operations/operations.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/operations/operations.go b/fs/operations/operations.go index 9e269b72fe82d..0df993486a57a 100644 --- a/fs/operations/operations.go +++ b/fs/operations/operations.go @@ -721,6 +721,9 @@ func (c *checkMarch) DstOnly(dst fs.DirEntry) (recurse bool) { atomic.AddInt32(&c.srcFilesMissing, 1) case fs.Directory: // Do the same thing to the entire contents of the directory + if c.oneway { + return false + } return true default: panic("Bad object in DirEntries") From 41ba1bba2bf53aa1385beeb50c6afdd0bd7e5aa4 Mon Sep 17 00:00:00 2001 From: Ivan Andreev Date: Wed, 4 Dec 2019 13:43:58 +0300 Subject: [PATCH 29/47] chunker: reduce length of temporary suffix --- backend/chunker/chunker.go | 241 ++++++++++------ backend/chunker/chunker_internal_test.go | 350 ++++++++++++++--------- docs/content/chunker.md | 19 +- 3 files changed, 394 insertions(+), 216 deletions(-) diff --git a/backend/chunker/chunker.go b/backend/chunker/chunker.go index ca73052019054..42c9cf5b47111 100644 --- a/backend/chunker/chunker.go +++ b/backend/chunker/chunker.go @@ -12,11 +12,13 @@ import ( gohash "hash" "io" "io/ioutil" + "math/rand" "path" "regexp" "sort" "strconv" "strings" + "sync" "time" "github.com/pkg/errors" @@ -34,46 +36,57 @@ import ( // and optional metadata object. If it's present, // meta object is named after the original file. // +// The only supported metadata format is simplejson atm. +// It supports only per-file meta objects that are rudimentary, +// used mostly for consistency checks (lazily for performance reasons). +// Other formats can be developed that use an external meta store +// free of these limitations, but this needs some support from +// rclone core (eg. metadata store interfaces). +// // The following types of chunks are supported: // data and control, active and temporary. // Chunk type is identified by matching chunk file name // based on the chunk name format configured by user. // -// Both data and control chunks can be either temporary or -// active (non-temporary). +// Both data and control chunks can be either temporary (aka hidden) +// or active (non-temporary aka normal aka permanent). // An operation creates temporary chunks while it runs. -// By completion it removes temporary and leaves active -// (aka normal aka permanent) chunks. -// -// Temporary (aka hidden) chunks have a special hardcoded suffix -// in addition to the configured name pattern. The suffix comes last -// to prevent name collisions with non-temporary chunks. -// Temporary suffix includes so called transaction number usually -// abbreviated as `xactNo` below, a generic non-negative integer +// By completion it removes temporary and leaves active chunks. +// +// Temporary chunks have a special hardcoded suffix in addition +// to the configured name pattern. +// Temporary suffix includes so called transaction identifier +// (abbreviated as `xactID` below), a generic non-negative base-36 "number" // used by parallel operations to share a composite object. +// Chunker also accepts the longer decimal temporary suffix (obsolete), +// which is transparently converted to the new format. In its maximum +// length of 13 decimals it makes a 7-digit base-36 number. // // Chunker can tell data chunks from control chunks by the characters // located in the "hash placeholder" position of configured format. // Data chunks have decimal digits there. -// Control chunks have a short lowercase literal prepended by underscore -// in that position. +// Control chunks have in that position a short lowercase alphanumeric +// string (starting with a letter) prepended by underscore. // // Metadata format v1 does not define any control chunk types, // they are currently ignored aka reserved. // In future they can be used to implement resumable uploads etc. // const ( - ctrlTypeRegStr = `[a-z]{3,9}` - tempChunkFormat = `%s..tmp_%010d` - tempChunkRegStr = `\.\.tmp_([0-9]{10,19})` + ctrlTypeRegStr = `[a-z][a-z0-9]{2,6}` + tempSuffixFormat = `_%04s` + tempSuffixRegStr = `_([0-9a-z]{4,9})` + tempSuffixRegOld = `\.\.tmp_([0-9]{10,13})` ) var ( - ctrlTypeRegexp = regexp.MustCompile(`^` + ctrlTypeRegStr + `$`) + // regular expressions to validate control type and temporary suffix + ctrlTypeRegexp = regexp.MustCompile(`^` + ctrlTypeRegStr + `$`) + tempSuffixRegexp = regexp.MustCompile(`^` + tempSuffixRegStr + `$`) ) // Normally metadata is a small piece of JSON (about 100-300 bytes). -// The size of valid metadata size must never exceed this limit. +// The size of valid metadata must never exceed this limit. // Current maximum provides a reasonable room for future extensions. // // Please refrain from increasing it, this can cause old rclone versions @@ -101,6 +114,9 @@ const revealHidden = false // Prevent memory overflow due to specially crafted chunk name const maxSafeChunkNumber = 10000000 +// Number of attempts to find unique transaction identifier +const maxTransactionProbes = 100 + // standard chunker errors var ( ErrChunkOverflow = errors.New("chunk number overflow") @@ -113,13 +129,6 @@ const ( delFailed = 2 // move, then delete and try again if failed ) -// Note: metadata logic is tightly coupled with chunker code in many -// places, eg. in checks whether a file should have meta object or is -// eligible for chunking. -// If more metadata formats (or versions of a format) are added in future, -// it may be advisable to factor it into a "metadata strategy" interface -// similar to chunkingReader or linearReader below. - // Register with Fs func init() { fs.Register(&fs.RegInfo{ @@ -261,7 +270,7 @@ func NewFs(name, rpath string, m configmap.Mapper) (fs.Fs, error) { // detects a composite file because it finds the first chunk! // (yet can't satisfy fstest.CheckListing, will ignore) if err == nil && !f.useMeta && strings.Contains(rpath, "/") { - firstChunkPath := f.makeChunkName(remotePath, 0, "", -1) + firstChunkPath := f.makeChunkName(remotePath, 0, "", "") _, testErr := baseInfo.NewFs(baseName, firstChunkPath, baseConfig) if testErr == fs.ErrorIsFile { err = testErr @@ -310,12 +319,16 @@ type Fs struct { dataNameFmt string // name format of data chunks ctrlNameFmt string // name format of control chunks nameRegexp *regexp.Regexp // regular expression to match chunk names + xactIDRand *rand.Rand // generator of random transaction identifiers + xactIDMutex sync.Mutex // mutex for the source of randomness opt Options // copy of Options features *fs.Features // optional features dirSort bool // reserved for future, ignored } -// configure must be called only from NewFs or by unit tests +// configure sets up chunker for given name format, meta format and hash type. +// It also seeds the source of random transaction identifiers. +// configure must be called only from NewFs or by unit tests. func (f *Fs) configure(nameFormat, metaFormat, hashType string) error { if err := f.setChunkNameFormat(nameFormat); err != nil { return errors.Wrapf(err, "invalid name format '%s'", nameFormat) @@ -326,6 +339,10 @@ func (f *Fs) configure(nameFormat, metaFormat, hashType string) error { if err := f.setHashType(hashType); err != nil { return err } + + randomSeed := time.Now().UnixNano() + f.xactIDRand = rand.New(rand.NewSource(randomSeed)) + return nil } @@ -414,13 +431,13 @@ func (f *Fs) setChunkNameFormat(pattern string) error { } reDataOrCtrl := fmt.Sprintf("(?:(%s)|_(%s))", reDigits, ctrlTypeRegStr) - // this must be non-greedy or else it can eat up temporary suffix + // this must be non-greedy or else it could eat up temporary suffix const mainNameRegStr = "(.+?)" strRegex := regexp.QuoteMeta(pattern) strRegex = reHashes.ReplaceAllLiteralString(strRegex, reDataOrCtrl) strRegex = strings.Replace(strRegex, "\\*", mainNameRegStr, -1) - strRegex = fmt.Sprintf("^%s(?:%s)?$", strRegex, tempChunkRegStr) + strRegex = fmt.Sprintf("^%s(?:%s|%s)?$", strRegex, tempSuffixRegStr, tempSuffixRegOld) f.nameRegexp = regexp.MustCompile(strRegex) // craft printf formats for active data/control chunks @@ -435,34 +452,36 @@ func (f *Fs) setChunkNameFormat(pattern string) error { return nil } -// makeChunkName produces chunk name (or path) for given file. +// makeChunkName produces chunk name (or path) for a given file. // -// mainPath can be name, relative or absolute path of main file. +// filePath can be name, relative or absolute path of main file. // // chunkNo must be a zero based index of data chunk. // Negative chunkNo eg. -1 indicates a control chunk. // ctrlType is type of control chunk (must be valid). // ctrlType must be "" for data chunks. // -// xactNo is a transaction number. -// Negative xactNo eg. -1 indicates an active chunk, -// otherwise produce temporary chunk name. +// xactID is a transaction identifier. Empty xactID denotes active chunk, +// otherwise temporary chunk name is produced. // -func (f *Fs) makeChunkName(mainPath string, chunkNo int, ctrlType string, xactNo int64) string { - dir, mainName := path.Split(mainPath) - var name string +func (f *Fs) makeChunkName(filePath string, chunkNo int, ctrlType, xactID string) string { + dir, parentName := path.Split(filePath) + var name, tempSuffix string switch { case chunkNo >= 0 && ctrlType == "": - name = fmt.Sprintf(f.dataNameFmt, mainName, chunkNo+f.opt.StartFrom) + name = fmt.Sprintf(f.dataNameFmt, parentName, chunkNo+f.opt.StartFrom) case chunkNo < 0 && ctrlTypeRegexp.MatchString(ctrlType): - name = fmt.Sprintf(f.ctrlNameFmt, mainName, ctrlType) + name = fmt.Sprintf(f.ctrlNameFmt, parentName, ctrlType) default: panic("makeChunkName: invalid argument") // must not produce something we can't consume } - if xactNo >= 0 { - name = fmt.Sprintf(tempChunkFormat, name, xactNo) + if xactID != "" { + tempSuffix = fmt.Sprintf(tempSuffixFormat, xactID) + if !tempSuffixRegexp.MatchString(tempSuffix) { + panic("makeChunkName: invalid argument") + } } - return dir + name + return dir + name + tempSuffix } // parseChunkName checks whether given file path belongs to @@ -470,20 +489,21 @@ func (f *Fs) makeChunkName(mainPath string, chunkNo int, ctrlType string, xactNo // // filePath can be name, relative or absolute path of a file. // -// Returned mainPath is a non-empty string if valid chunk name -// is detected or "" if it's not a chunk. +// Returned parentPath is path of the composite file owning the chunk. +// It's a non-empty string if valid chunk name is detected +// or "" if it's not a chunk. // Other returned values depend on detected chunk type: // data or control, active or temporary: // // data chunk - the returned chunkNo is non-negative and ctrlType is "" -// control chunk - the chunkNo is -1 and ctrlType is non-empty string -// active chunk - the returned xactNo is -1 -// temporary chunk - the xactNo is non-negative integer -func (f *Fs) parseChunkName(filePath string) (mainPath string, chunkNo int, ctrlType string, xactNo int64) { +// control chunk - the chunkNo is -1 and ctrlType is a non-empty string +// active chunk - the returned xactID is "" +// temporary chunk - the xactID is a non-empty string +func (f *Fs) parseChunkName(filePath string) (parentPath string, chunkNo int, ctrlType, xactID string) { dir, name := path.Split(filePath) match := f.nameRegexp.FindStringSubmatch(name) if match == nil || match[1] == "" { - return "", -1, "", -1 + return "", -1, "", "" } var err error @@ -494,19 +514,26 @@ func (f *Fs) parseChunkName(filePath string) (mainPath string, chunkNo int, ctrl } if chunkNo -= f.opt.StartFrom; chunkNo < 0 { fs.Infof(f, "invalid data chunk number in file %q", name) - return "", -1, "", -1 + return "", -1, "", "" } } - xactNo = -1 if match[4] != "" { - if xactNo, err = strconv.ParseInt(match[4], 10, 64); err != nil || xactNo < 0 { - fs.Infof(f, "invalid transaction number in file %q", name) - return "", -1, "", -1 + xactID = match[4] + } + if match[5] != "" { + // old-style temporary suffix + number, err := strconv.ParseInt(match[5], 10, 64) + if err != nil || number < 0 { + fs.Infof(f, "invalid old-style transaction number in file %q", name) + return "", -1, "", "" } + // convert old-style transaction number to base-36 transaction ID + xactID = fmt.Sprintf(tempSuffixFormat, strconv.FormatInt(number, 36)) + xactID = xactID[1:] // strip leading underscore } - mainPath = dir + match[1] + parentPath = dir + match[1] ctrlType = match[3] return } @@ -514,17 +541,74 @@ func (f *Fs) parseChunkName(filePath string) (mainPath string, chunkNo int, ctrl // forbidChunk prints error message or raises error if file is chunk. // First argument sets log prefix, use `false` to suppress message. func (f *Fs) forbidChunk(o interface{}, filePath string) error { - if mainPath, _, _, _ := f.parseChunkName(filePath); mainPath != "" { + if parentPath, _, _, _ := f.parseChunkName(filePath); parentPath != "" { if f.opt.FailHard { - return fmt.Errorf("chunk overlap with %q", mainPath) + return fmt.Errorf("chunk overlap with %q", parentPath) } if boolVal, isBool := o.(bool); !isBool || boolVal { - fs.Errorf(o, "chunk overlap with %q", mainPath) + fs.Errorf(o, "chunk overlap with %q", parentPath) } } return nil } +// newXactID produces a sufficiently random transaction identifier. +// +// The temporary suffix mask allows identifiers consisting of 4-9 +// base-36 digits (ie. digits 0-9 or lowercase letters a-z). +// The identifiers must be unique between transactions running on +// the single file in parallel. +// +// Currently the function produces 6-character identifiers. +// Together with underscore this makes a 7-character temporary suffix. +// +// The first 4 characters isolate groups of transactions by time intervals. +// The maximum length of interval is base-36 "zzzz" ie. 1,679,615 seconds. +// The function rather takes a maximum prime closest to this number +// (see https://primes.utm.edu) as the interval length to better safeguard +// against repeating pseudo-random sequences in cases when rclone is +// invoked from a periodic scheduler like unix cron. +// Thus, the interval is slightly more than 19 days 10 hours 33 minutes. +// +// The remaining 2 base-36 digits (in the range from 0 to 1295 inclusive) +// are taken from the local random source. +// This provides about 0.1% collision probability for two parallel +// operations started at the same second and working on the same file. +// +// Non-empty filePath argument enables probing for existing temporary chunk +// to further eliminate collisions. +func (f *Fs) newXactID(ctx context.Context, filePath string) (xactID string, err error) { + const closestPrimeZzzzSeconds = 1679609 + const maxTwoBase36Digits = 1295 + + unixSec := time.Now().Unix() + if unixSec < 0 { + unixSec = -unixSec // unlikely but the number must be positive + } + circleSec := unixSec % closestPrimeZzzzSeconds + first4chars := strconv.FormatInt(circleSec, 36) + + for tries := 0; tries < maxTransactionProbes; tries++ { + f.xactIDMutex.Lock() + randomness := f.xactIDRand.Int63n(maxTwoBase36Digits + 1) + f.xactIDMutex.Unlock() + + last2chars := strconv.FormatInt(randomness, 36) + xactID = fmt.Sprintf("%04s%02s", first4chars, last2chars) + + if filePath == "" { + return + } + probeChunk := f.makeChunkName(filePath, 0, "", xactID) + _, probeErr := f.base.NewObject(ctx, probeChunk) + if probeErr != nil { + return + } + } + + return "", fmt.Errorf("can't setup transaction for %s", filePath) +} + // List the objects and directories in dir into entries. // The entries can be returned in any order but should be // for a complete directory. @@ -602,8 +686,8 @@ func (f *Fs) processEntries(ctx context.Context, origEntries fs.DirEntries, dirP switch entry := dirOrObject.(type) { case fs.Object: remote := entry.Remote() - if mainRemote, chunkNo, ctrlType, xactNo := f.parseChunkName(remote); mainRemote != "" { - if xactNo != -1 { + if mainRemote, chunkNo, ctrlType, xactID := f.parseChunkName(remote); mainRemote != "" { + if xactID != "" { if revealHidden { fs.Infof(f, "ignore temporary chunk %q", remote) } @@ -686,7 +770,7 @@ func (f *Fs) processEntries(ctx context.Context, origEntries fs.DirEntries, dirP // // Please note that every NewObject invocation will scan the whole directory. // Using here something like fs.DirCache might improve performance -// (but will make logic more complex, though). +// (yet making the logic more complex). // // Note that chunker prefers analyzing file names rather than reading // the content of meta object assuming that directory scans are fast @@ -752,8 +836,8 @@ func (f *Fs) NewObject(ctx context.Context, remote string) (fs.Object, error) { if !strings.Contains(entryRemote, remote) { continue // bypass regexp to save cpu } - mainRemote, chunkNo, ctrlType, xactNo := f.parseChunkName(entryRemote) - if mainRemote == "" || mainRemote != remote || ctrlType != "" || xactNo != -1 { + mainRemote, chunkNo, ctrlType, xactID := f.parseChunkName(entryRemote) + if mainRemote == "" || mainRemote != remote || ctrlType != "" || xactID != "" { continue // skip non-conforming, temporary and control chunks } //fs.Debugf(f, "%q belongs to %q as chunk %d", entryRemote, mainRemote, chunkNo) @@ -786,7 +870,7 @@ func (f *Fs) NewObject(ctx context.Context, remote string) (fs.Object, error) { // This is either a composite object with metadata or a non-chunked // file without metadata. Validate it and update the total data size. // As an optimization, skip metadata reading here - we will call - // readMetadata lazily when needed. + // readMetadata lazily when needed (reading can be expensive). if err := o.validate(); err != nil { return nil, err } @@ -843,14 +927,11 @@ func (f *Fs) put(ctx context.Context, in io.Reader, src fs.ObjectInfo, remote st } }() - // Use system timer as a trivial source of transaction numbers, - // don't try hard to safeguard against chunk collisions between - // parallel transactions. - xactNo := time.Now().Unix() - if xactNo < 0 { - xactNo = -xactNo // unlikely but transaction number must be positive - } baseRemote := remote + xactID, errXact := f.newXactID(ctx, baseRemote) + if errXact != nil { + return nil, errXact + } // Transfer chunks data for c.chunkNo = 0; !c.done; c.chunkNo++ { @@ -858,7 +939,7 @@ func (f *Fs) put(ctx context.Context, in io.Reader, src fs.ObjectInfo, remote st return nil, ErrChunkOverflow } - tempRemote := f.makeChunkName(baseRemote, c.chunkNo, "", xactNo) + tempRemote := f.makeChunkName(baseRemote, c.chunkNo, "", xactID) size := c.sizeLeft if size > c.chunkSize { size = c.chunkSize @@ -962,7 +1043,7 @@ func (f *Fs) put(ctx context.Context, in io.Reader, src fs.ObjectInfo, remote st // Rename data chunks from temporary to final names for chunkNo, chunk := range c.chunks { - chunkRemote := f.makeChunkName(baseRemote, chunkNo, "", -1) + chunkRemote := f.makeChunkName(baseRemote, chunkNo, "", "") chunkMoved, errMove := f.baseMove(ctx, chunk, chunkRemote, delFailed) if errMove != nil { return nil, errMove @@ -1221,11 +1302,6 @@ func (f *Fs) PutUnchecked(ctx context.Context, in io.Reader, src fs.ObjectInfo, return f.newObject("", o, nil), nil } -// Precision returns the precision of this Fs -func (f *Fs) Precision() time.Duration { - return f.base.Precision() -} - // Hashes returns the supported hash sets. // Chunker advertises a hash type if and only if it can be calculated // for files of any size, non-chunked or composite. @@ -1613,8 +1689,8 @@ func (f *Fs) ChangeNotify(ctx context.Context, notifyFunc func(string, fs.EntryT wrappedNotifyFunc := func(path string, entryType fs.EntryType) { //fs.Debugf(f, "ChangeNotify: path %q entryType %d", path, entryType) if entryType == fs.EntryObject { - mainPath, _, _, xactNo := f.parseChunkName(path) - if mainPath != "" && xactNo == -1 { + mainPath, _, _, xactID := f.parseChunkName(path) + if mainPath != "" && xactID == "" { path = mainPath } } @@ -2063,7 +2139,7 @@ type metaSimpleJSON struct { // Current implementation creates metadata in three cases: // - for files larger than chunk size // - if file contents can be mistaken as meta object -// - if consistent hashing is on but wrapped remote can't provide given hash +// - if consistent hashing is On but wrapped remote can't provide given hash // func marshalSimpleJSON(ctx context.Context, size int64, nChunks int, md5, sha1 string) ([]byte, error) { version := metadataVersion @@ -2177,6 +2253,11 @@ func (f *Fs) String() string { return fmt.Sprintf("Chunked '%s:%s'", f.name, f.root) } +// Precision returns the precision of this Fs +func (f *Fs) Precision() time.Duration { + return f.base.Precision() +} + // Check the interfaces are satisfied var ( _ fs.Fs = (*Fs)(nil) diff --git a/backend/chunker/chunker_internal_test.go b/backend/chunker/chunker_internal_test.go index 372fa7bc64f08..6ba6890d211d0 100644 --- a/backend/chunker/chunker_internal_test.go +++ b/backend/chunker/chunker_internal_test.go @@ -64,35 +64,40 @@ func testChunkNameFormat(t *testing.T, f *Fs) { assert.Error(t, err) } - assertMakeName := func(wantChunkName, mainName string, chunkNo int, ctrlType string, xactNo int64) { - gotChunkName := f.makeChunkName(mainName, chunkNo, ctrlType, xactNo) - assert.Equal(t, wantChunkName, gotChunkName) + assertMakeName := func(wantChunkName, mainName string, chunkNo int, ctrlType, xactID string) { + gotChunkName := "" + assert.NotPanics(t, func() { + gotChunkName = f.makeChunkName(mainName, chunkNo, ctrlType, xactID) + }, "makeChunkName(%q,%d,%q,%q) must not panic", mainName, chunkNo, ctrlType, xactID) + if gotChunkName != "" { + assert.Equal(t, wantChunkName, gotChunkName) + } } - assertMakeNamePanics := func(mainName string, chunkNo int, ctrlType string, xactNo int64) { + assertMakeNamePanics := func(mainName string, chunkNo int, ctrlType, xactID string) { assert.Panics(t, func() { - _ = f.makeChunkName(mainName, chunkNo, ctrlType, xactNo) - }, "makeChunkName(%q,%d,%q,%d) should panic", mainName, chunkNo, ctrlType, xactNo) + _ = f.makeChunkName(mainName, chunkNo, ctrlType, xactID) + }, "makeChunkName(%q,%d,%q,%q) should panic", mainName, chunkNo, ctrlType, xactID) } - assertParseName := func(fileName, wantMainName string, wantChunkNo int, wantCtrlType string, wantXactNo int64) { - gotMainName, gotChunkNo, gotCtrlType, gotXactNo := f.parseChunkName(fileName) + assertParseName := func(fileName, wantMainName string, wantChunkNo int, wantCtrlType, wantXactID string) { + gotMainName, gotChunkNo, gotCtrlType, gotXactID := f.parseChunkName(fileName) assert.Equal(t, wantMainName, gotMainName) assert.Equal(t, wantChunkNo, gotChunkNo) assert.Equal(t, wantCtrlType, gotCtrlType) - assert.Equal(t, wantXactNo, gotXactNo) + assert.Equal(t, wantXactID, gotXactID) } const newFormatSupported = false // support for patterns not starting with base name (*) // valid formats - assertFormat(`*.rclone_chunk.###`, `%s.rclone_chunk.%03d`, `%s.rclone_chunk._%s`, `^(.+?)\.rclone_chunk\.(?:([0-9]{3,})|_([a-z]{3,9}))(?:\.\.tmp_([0-9]{10,19}))?$`) - assertFormat(`*.rclone_chunk.#`, `%s.rclone_chunk.%d`, `%s.rclone_chunk._%s`, `^(.+?)\.rclone_chunk\.(?:([0-9]+)|_([a-z]{3,9}))(?:\.\.tmp_([0-9]{10,19}))?$`) - assertFormat(`*_chunk_#####`, `%s_chunk_%05d`, `%s_chunk__%s`, `^(.+?)_chunk_(?:([0-9]{5,})|_([a-z]{3,9}))(?:\.\.tmp_([0-9]{10,19}))?$`) - assertFormat(`*-chunk-#`, `%s-chunk-%d`, `%s-chunk-_%s`, `^(.+?)-chunk-(?:([0-9]+)|_([a-z]{3,9}))(?:\.\.tmp_([0-9]{10,19}))?$`) - assertFormat(`*-chunk-#-%^$()[]{}.+-!?:\`, `%s-chunk-%d-%%^$()[]{}.+-!?:\`, `%s-chunk-_%s-%%^$()[]{}.+-!?:\`, `^(.+?)-chunk-(?:([0-9]+)|_([a-z]{3,9}))-%\^\$\(\)\[\]\{\}\.\+-!\?:\\(?:\.\.tmp_([0-9]{10,19}))?$`) + assertFormat(`*.rclone_chunk.###`, `%s.rclone_chunk.%03d`, `%s.rclone_chunk._%s`, `^(.+?)\.rclone_chunk\.(?:([0-9]{3,})|_([a-z][a-z0-9]{2,6}))(?:_([0-9a-z]{4,9})|\.\.tmp_([0-9]{10,13}))?$`) + assertFormat(`*.rclone_chunk.#`, `%s.rclone_chunk.%d`, `%s.rclone_chunk._%s`, `^(.+?)\.rclone_chunk\.(?:([0-9]+)|_([a-z][a-z0-9]{2,6}))(?:_([0-9a-z]{4,9})|\.\.tmp_([0-9]{10,13}))?$`) + assertFormat(`*_chunk_#####`, `%s_chunk_%05d`, `%s_chunk__%s`, `^(.+?)_chunk_(?:([0-9]{5,})|_([a-z][a-z0-9]{2,6}))(?:_([0-9a-z]{4,9})|\.\.tmp_([0-9]{10,13}))?$`) + assertFormat(`*-chunk-#`, `%s-chunk-%d`, `%s-chunk-_%s`, `^(.+?)-chunk-(?:([0-9]+)|_([a-z][a-z0-9]{2,6}))(?:_([0-9a-z]{4,9})|\.\.tmp_([0-9]{10,13}))?$`) + assertFormat(`*-chunk-#-%^$()[]{}.+-!?:\`, `%s-chunk-%d-%%^$()[]{}.+-!?:\`, `%s-chunk-_%s-%%^$()[]{}.+-!?:\`, `^(.+?)-chunk-(?:([0-9]+)|_([a-z][a-z0-9]{2,6}))-%\^\$\(\)\[\]\{\}\.\+-!\?:\\(?:_([0-9a-z]{4,9})|\.\.tmp_([0-9]{10,13}))?$`) if newFormatSupported { - assertFormat(`_*-chunk-##,`, `_%s-chunk-%02d,`, `_%s-chunk-_%s,`, `^_(.+?)-chunk-(?:([0-9]{2,})|_([a-z]{3,9})),(?:\.\.tmp_([0-9]{10,19}))?$`) + assertFormat(`_*-chunk-##,`, `_%s-chunk-%02d,`, `_%s-chunk-_%s,`, `^_(.+?)-chunk-(?:([0-9]{2,})|_([a-z][a-z0-9]{2,6})),(?:_([0-9a-z]{4,9})|\.\.tmp_([0-9]{10,13}))?$`) } // invalid formats @@ -111,142 +116,223 @@ func testChunkNameFormat(t *testing.T, f *Fs) { // quick tests if newFormatSupported { - assertFormat(`part_*_#`, `part_%s_%d`, `part_%s__%s`, `^part_(.+?)_(?:([0-9]+)|_([a-z]{3,9}))(?:\.\.tmp_([0-9]{10,19}))?$`) + assertFormat(`part_*_#`, `part_%s_%d`, `part_%s__%s`, `^part_(.+?)_(?:([0-9]+)|_([a-z][a-z0-9]{2,6}))(?:_([0-9][0-9a-z]{3,8})\.\.tmp_([0-9]{10,13}))?$`) f.opt.StartFrom = 1 - assertMakeName(`part_fish_1`, "fish", 0, "", -1) - assertParseName(`part_fish_43`, "fish", 42, "", -1) - assertMakeName(`part_fish_3..tmp_0000000004`, "fish", 2, "", 4) - assertParseName(`part_fish_4..tmp_0000000005`, "fish", 3, "", 5) - assertMakeName(`part_fish__locks`, "fish", -2, "locks", -3) - assertParseName(`part_fish__locks`, "fish", -1, "locks", -1) - assertMakeName(`part_fish__blockinfo..tmp_1234567890123456789`, "fish", -3, "blockinfo", 1234567890123456789) - assertParseName(`part_fish__blockinfo..tmp_1234567890123456789`, "fish", -1, "blockinfo", 1234567890123456789) + assertMakeName(`part_fish_1`, "fish", 0, "", "") + assertParseName(`part_fish_43`, "fish", 42, "", "") + assertMakeName(`part_fish__locks`, "fish", -2, "locks", "") + assertParseName(`part_fish__locks`, "fish", -1, "locks", "") + assertMakeName(`part_fish__x2y`, "fish", -2, "x2y", "") + assertParseName(`part_fish__x2y`, "fish", -1, "x2y", "") + assertMakeName(`part_fish_3_0004`, "fish", 2, "", "4") + assertParseName(`part_fish_4_0005`, "fish", 3, "", "0005") + assertMakeName(`part_fish__blkinfo_jj5fvo3wr`, "fish", -3, "blkinfo", "jj5fvo3wr") + assertParseName(`part_fish__blkinfo_zz9fvo3wr`, "fish", -1, "blkinfo", "zz9fvo3wr") + + // old-style temporary suffix (parse only) + assertParseName(`part_fish_4..tmp_0000000011`, "fish", 3, "", "000b") + assertParseName(`part_fish__blkinfo_jj5fvo3wr`, "fish", -1, "blkinfo", "jj5fvo3wr") } // prepare format for long tests - assertFormat(`*.chunk.###`, `%s.chunk.%03d`, `%s.chunk._%s`, `^(.+?)\.chunk\.(?:([0-9]{3,})|_([a-z]{3,9}))(?:\.\.tmp_([0-9]{10,19}))?$`) + assertFormat(`*.chunk.###`, `%s.chunk.%03d`, `%s.chunk._%s`, `^(.+?)\.chunk\.(?:([0-9]{3,})|_([a-z][a-z0-9]{2,6}))(?:_([0-9a-z]{4,9})|\.\.tmp_([0-9]{10,13}))?$`) f.opt.StartFrom = 2 // valid data chunks - assertMakeName(`fish.chunk.003`, "fish", 1, "", -1) - assertMakeName(`fish.chunk.011..tmp_0000054321`, "fish", 9, "", 54321) - assertMakeName(`fish.chunk.011..tmp_1234567890`, "fish", 9, "", 1234567890) - assertMakeName(`fish.chunk.1916..tmp_123456789012345`, "fish", 1914, "", 123456789012345) - - assertParseName(`fish.chunk.003`, "fish", 1, "", -1) - assertParseName(`fish.chunk.004..tmp_0000000021`, "fish", 2, "", 21) - assertParseName(`fish.chunk.021`, "fish", 19, "", -1) - assertParseName(`fish.chunk.323..tmp_1234567890123456789`, "fish", 321, "", 1234567890123456789) + assertMakeName(`fish.chunk.003`, "fish", 1, "", "") + assertParseName(`fish.chunk.003`, "fish", 1, "", "") + assertMakeName(`fish.chunk.021`, "fish", 19, "", "") + assertParseName(`fish.chunk.021`, "fish", 19, "", "") + + // valid temporary data chunks + assertMakeName(`fish.chunk.011_4321`, "fish", 9, "", "4321") + assertParseName(`fish.chunk.011_4321`, "fish", 9, "", "4321") + assertMakeName(`fish.chunk.011_00bc`, "fish", 9, "", "00bc") + assertParseName(`fish.chunk.011_00bc`, "fish", 9, "", "00bc") + assertMakeName(`fish.chunk.1916_5jjfvo3wr`, "fish", 1914, "", "5jjfvo3wr") + assertParseName(`fish.chunk.1916_5jjfvo3wr`, "fish", 1914, "", "5jjfvo3wr") + assertMakeName(`fish.chunk.1917_zz9fvo3wr`, "fish", 1915, "", "zz9fvo3wr") + assertParseName(`fish.chunk.1917_zz9fvo3wr`, "fish", 1915, "", "zz9fvo3wr") + + // valid temporary data chunks (old temporary suffix, only parse) + assertParseName(`fish.chunk.004..tmp_0000000047`, "fish", 2, "", "001b") + assertParseName(`fish.chunk.323..tmp_9994567890123`, "fish", 321, "", "3jjfvo3wr") // parsing invalid data chunk names - assertParseName(`fish.chunk.3`, "", -1, "", -1) - assertParseName(`fish.chunk.001`, "", -1, "", -1) - assertParseName(`fish.chunk.21`, "", -1, "", -1) - assertParseName(`fish.chunk.-21`, "", -1, "", -1) - - assertParseName(`fish.chunk.004.tmp_0000000021`, "", -1, "", -1) - assertParseName(`fish.chunk.003..tmp_123456789`, "", -1, "", -1) - assertParseName(`fish.chunk.003..tmp_012345678901234567890123456789`, "", -1, "", -1) - assertParseName(`fish.chunk.003..tmp_-1`, "", -1, "", -1) + assertParseName(`fish.chunk.3`, "", -1, "", "") + assertParseName(`fish.chunk.001`, "", -1, "", "") + assertParseName(`fish.chunk.21`, "", -1, "", "") + assertParseName(`fish.chunk.-21`, "", -1, "", "") + + assertParseName(`fish.chunk.004abcd`, "", -1, "", "") // missing underscore delimiter + assertParseName(`fish.chunk.004__1234`, "", -1, "", "") // extra underscore delimiter + assertParseName(`fish.chunk.004_123`, "", -1, "", "") // too short temporary suffix + assertParseName(`fish.chunk.004_1234567890`, "", -1, "", "") // too long temporary suffix + assertParseName(`fish.chunk.004_-1234`, "", -1, "", "") // temporary suffix must be positive + assertParseName(`fish.chunk.004_123E`, "", -1, "", "") // uppercase not allowed + assertParseName(`fish.chunk.004_12.3`, "", -1, "", "") // punctuation not allowed + + // parsing invalid data chunk names (old temporary suffix) + assertParseName(`fish.chunk.004.tmp_0000000021`, "", -1, "", "") + assertParseName(`fish.chunk.003..tmp_123456789`, "", -1, "", "") + assertParseName(`fish.chunk.003..tmp_012345678901234567890123456789`, "", -1, "", "") + assertParseName(`fish.chunk.323..tmp_12345678901234`, "", -1, "", "") + assertParseName(`fish.chunk.003..tmp_-1`, "", -1, "", "") // valid control chunks - assertMakeName(`fish.chunk._info`, "fish", -1, "info", -1) - assertMakeName(`fish.chunk._locks`, "fish", -2, "locks", -1) - assertMakeName(`fish.chunk._blockinfo`, "fish", -3, "blockinfo", -1) + assertMakeName(`fish.chunk._info`, "fish", -1, "info", "") + assertMakeName(`fish.chunk._locks`, "fish", -2, "locks", "") + assertMakeName(`fish.chunk._blkinfo`, "fish", -3, "blkinfo", "") + assertMakeName(`fish.chunk._x2y`, "fish", -4, "x2y", "") - assertParseName(`fish.chunk._info`, "fish", -1, "info", -1) - assertParseName(`fish.chunk._locks`, "fish", -1, "locks", -1) - assertParseName(`fish.chunk._blockinfo`, "fish", -1, "blockinfo", -1) + assertParseName(`fish.chunk._info`, "fish", -1, "info", "") + assertParseName(`fish.chunk._locks`, "fish", -1, "locks", "") + assertParseName(`fish.chunk._blkinfo`, "fish", -1, "blkinfo", "") + assertParseName(`fish.chunk._x2y`, "fish", -1, "x2y", "") // valid temporary control chunks - assertMakeName(`fish.chunk._info..tmp_0000000021`, "fish", -1, "info", 21) - assertMakeName(`fish.chunk._locks..tmp_0000054321`, "fish", -2, "locks", 54321) - assertMakeName(`fish.chunk._uploads..tmp_0000000000`, "fish", -3, "uploads", 0) - assertMakeName(`fish.chunk._blockinfo..tmp_1234567890123456789`, "fish", -4, "blockinfo", 1234567890123456789) - - assertParseName(`fish.chunk._info..tmp_0000000021`, "fish", -1, "info", 21) - assertParseName(`fish.chunk._locks..tmp_0000054321`, "fish", -1, "locks", 54321) - assertParseName(`fish.chunk._uploads..tmp_0000000000`, "fish", -1, "uploads", 0) - assertParseName(`fish.chunk._blockinfo..tmp_1234567890123456789`, "fish", -1, "blockinfo", 1234567890123456789) + assertMakeName(`fish.chunk._info_0001`, "fish", -1, "info", "1") + assertMakeName(`fish.chunk._locks_4321`, "fish", -2, "locks", "4321") + assertMakeName(`fish.chunk._uploads_abcd`, "fish", -3, "uploads", "abcd") + assertMakeName(`fish.chunk._blkinfo_xyzabcdef`, "fish", -4, "blkinfo", "xyzabcdef") + assertMakeName(`fish.chunk._x2y_1aaa`, "fish", -5, "x2y", "1aaa") + + assertParseName(`fish.chunk._info_0001`, "fish", -1, "info", "0001") + assertParseName(`fish.chunk._locks_4321`, "fish", -1, "locks", "4321") + assertParseName(`fish.chunk._uploads_9abc`, "fish", -1, "uploads", "9abc") + assertParseName(`fish.chunk._blkinfo_xyzabcdef`, "fish", -1, "blkinfo", "xyzabcdef") + assertParseName(`fish.chunk._x2y_1aaa`, "fish", -1, "x2y", "1aaa") + + // valid temporary control chunks (old temporary suffix, parse only) + assertParseName(`fish.chunk._info..tmp_0000000047`, "fish", -1, "info", "001b") + assertParseName(`fish.chunk._locks..tmp_0000054321`, "fish", -1, "locks", "15wx") + assertParseName(`fish.chunk._uploads..tmp_0000000000`, "fish", -1, "uploads", "0000") + assertParseName(`fish.chunk._blkinfo..tmp_9994567890123`, "fish", -1, "blkinfo", "3jjfvo3wr") + assertParseName(`fish.chunk._x2y..tmp_0000000000`, "fish", -1, "x2y", "0000") // parsing invalid control chunk names - assertParseName(`fish.chunk.info`, "", -1, "", -1) - assertParseName(`fish.chunk.locks`, "", -1, "", -1) - assertParseName(`fish.chunk.uploads`, "", -1, "", -1) - assertParseName(`fish.chunk.blockinfo`, "", -1, "", -1) - - assertParseName(`fish.chunk._os`, "", -1, "", -1) - assertParseName(`fish.chunk._futuredata`, "", -1, "", -1) - assertParseName(`fish.chunk._me_ta`, "", -1, "", -1) - assertParseName(`fish.chunk._in-fo`, "", -1, "", -1) - assertParseName(`fish.chunk._.bin`, "", -1, "", -1) - - assertParseName(`fish.chunk._locks..tmp_123456789`, "", -1, "", -1) - assertParseName(`fish.chunk._meta..tmp_-1`, "", -1, "", -1) - assertParseName(`fish.chunk._blockinfo..tmp_012345678901234567890123456789`, "", -1, "", -1) + assertParseName(`fish.chunk.metadata`, "", -1, "", "") // must be prepended by underscore + assertParseName(`fish.chunk.info`, "", -1, "", "") + assertParseName(`fish.chunk.locks`, "", -1, "", "") + assertParseName(`fish.chunk.uploads`, "", -1, "", "") + + assertParseName(`fish.chunk._os`, "", -1, "", "") // too short + assertParseName(`fish.chunk._metadata`, "", -1, "", "") // too long + assertParseName(`fish.chunk._blockinfo`, "", -1, "", "") // way too long + assertParseName(`fish.chunk._4me`, "", -1, "", "") // cannot start with digit + assertParseName(`fish.chunk._567`, "", -1, "", "") // cannot be all digits + assertParseName(`fish.chunk._me_ta`, "", -1, "", "") // punctuation not allowed + assertParseName(`fish.chunk._in-fo`, "", -1, "", "") + assertParseName(`fish.chunk._.bin`, "", -1, "", "") + assertParseName(`fish.chunk._.2xy`, "", -1, "", "") + + // parsing invalid temporary control chunks + assertParseName(`fish.chunk._blkinfo1234`, "", -1, "", "") // missing underscore delimiter + assertParseName(`fish.chunk._info__1234`, "", -1, "", "") // extra underscore delimiter + assertParseName(`fish.chunk._info_123`, "", -1, "", "") // too short temporary suffix + assertParseName(`fish.chunk._info_1234567890`, "", -1, "", "") // too long temporary suffix + assertParseName(`fish.chunk._info_-1234`, "", -1, "", "") // temporary suffix must be positive + assertParseName(`fish.chunk._info_123E`, "", -1, "", "") // uppercase not allowed + assertParseName(`fish.chunk._info_12.3`, "", -1, "", "") // punctuation not allowed + + assertParseName(`fish.chunk._locks..tmp_123456789`, "", -1, "", "") + assertParseName(`fish.chunk._meta..tmp_-1`, "", -1, "", "") + assertParseName(`fish.chunk._blockinfo..tmp_012345678901234567890123456789`, "", -1, "", "") // short control chunk names: 3 letters ok, 1-2 letters not allowed - assertMakeName(`fish.chunk._ext`, "fish", -1, "ext", -1) - assertMakeName(`fish.chunk._ext..tmp_0000000021`, "fish", -1, "ext", 21) - assertParseName(`fish.chunk._int`, "fish", -1, "int", -1) - assertParseName(`fish.chunk._int..tmp_0000000021`, "fish", -1, "int", 21) - assertMakeNamePanics("fish", -1, "in", -1) - assertMakeNamePanics("fish", -1, "up", 4) - assertMakeNamePanics("fish", -1, "x", -1) - assertMakeNamePanics("fish", -1, "c", 4) + assertMakeName(`fish.chunk._ext`, "fish", -1, "ext", "") + assertParseName(`fish.chunk._int`, "fish", -1, "int", "") + + assertMakeNamePanics("fish", -1, "in", "") + assertMakeNamePanics("fish", -1, "up", "4") + assertMakeNamePanics("fish", -1, "x", "") + assertMakeNamePanics("fish", -1, "c", "1z") + + assertMakeName(`fish.chunk._ext_0000`, "fish", -1, "ext", "0") + assertMakeName(`fish.chunk._ext_0026`, "fish", -1, "ext", "26") + assertMakeName(`fish.chunk._int_0abc`, "fish", -1, "int", "abc") + assertMakeName(`fish.chunk._int_9xyz`, "fish", -1, "int", "9xyz") + assertMakeName(`fish.chunk._out_jj5fvo3wr`, "fish", -1, "out", "jj5fvo3wr") + assertMakeName(`fish.chunk._out_jj5fvo3wr`, "fish", -1, "out", "jj5fvo3wr") + + assertParseName(`fish.chunk._ext_0000`, "fish", -1, "ext", "0000") + assertParseName(`fish.chunk._ext_0026`, "fish", -1, "ext", "0026") + assertParseName(`fish.chunk._int_0abc`, "fish", -1, "int", "0abc") + assertParseName(`fish.chunk._int_9xyz`, "fish", -1, "int", "9xyz") + assertParseName(`fish.chunk._out_jj5fvo3wr`, "fish", -1, "out", "jj5fvo3wr") + assertParseName(`fish.chunk._out_jj5fvo3wr`, "fish", -1, "out", "jj5fvo3wr") // base file name can sometimes look like a valid chunk name - assertParseName(`fish.chunk.003.chunk.004`, "fish.chunk.003", 2, "", -1) - assertParseName(`fish.chunk.003.chunk.005..tmp_0000000021`, "fish.chunk.003", 3, "", 21) - assertParseName(`fish.chunk.003.chunk._info`, "fish.chunk.003", -1, "info", -1) - assertParseName(`fish.chunk.003.chunk._blockinfo..tmp_1234567890123456789`, "fish.chunk.003", -1, "blockinfo", 1234567890123456789) - assertParseName(`fish.chunk.003.chunk._Meta`, "", -1, "", -1) - assertParseName(`fish.chunk.003.chunk._x..tmp_0000054321`, "", -1, "", -1) - - assertParseName(`fish.chunk.004..tmp_0000000021.chunk.004`, "fish.chunk.004..tmp_0000000021", 2, "", -1) - assertParseName(`fish.chunk.004..tmp_0000000021.chunk.005..tmp_0000000021`, "fish.chunk.004..tmp_0000000021", 3, "", 21) - assertParseName(`fish.chunk.004..tmp_0000000021.chunk._info`, "fish.chunk.004..tmp_0000000021", -1, "info", -1) - assertParseName(`fish.chunk.004..tmp_0000000021.chunk._blockinfo..tmp_1234567890123456789`, "fish.chunk.004..tmp_0000000021", -1, "blockinfo", 1234567890123456789) - assertParseName(`fish.chunk.004..tmp_0000000021.chunk._Meta`, "", -1, "", -1) - assertParseName(`fish.chunk.004..tmp_0000000021.chunk._x..tmp_0000054321`, "", -1, "", -1) - - assertParseName(`fish.chunk._info.chunk.004`, "fish.chunk._info", 2, "", -1) - assertParseName(`fish.chunk._info.chunk.005..tmp_0000000021`, "fish.chunk._info", 3, "", 21) - assertParseName(`fish.chunk._info.chunk._info`, "fish.chunk._info", -1, "info", -1) - assertParseName(`fish.chunk._info.chunk._blockinfo..tmp_1234567890123456789`, "fish.chunk._info", -1, "blockinfo", 1234567890123456789) - assertParseName(`fish.chunk._info.chunk._info.chunk._Meta`, "", -1, "", -1) - assertParseName(`fish.chunk._info.chunk._info.chunk._x..tmp_0000054321`, "", -1, "", -1) - - assertParseName(`fish.chunk._blockinfo..tmp_1234567890123456789.chunk.004`, "fish.chunk._blockinfo..tmp_1234567890123456789", 2, "", -1) - assertParseName(`fish.chunk._blockinfo..tmp_1234567890123456789.chunk.005..tmp_0000000021`, "fish.chunk._blockinfo..tmp_1234567890123456789", 3, "", 21) - assertParseName(`fish.chunk._blockinfo..tmp_1234567890123456789.chunk._info`, "fish.chunk._blockinfo..tmp_1234567890123456789", -1, "info", -1) - assertParseName(`fish.chunk._blockinfo..tmp_1234567890123456789.chunk._blockinfo..tmp_1234567890123456789`, "fish.chunk._blockinfo..tmp_1234567890123456789", -1, "blockinfo", 1234567890123456789) - assertParseName(`fish.chunk._blockinfo..tmp_1234567890123456789.chunk._info.chunk._Meta`, "", -1, "", -1) - assertParseName(`fish.chunk._blockinfo..tmp_1234567890123456789.chunk._info.chunk._x..tmp_0000054321`, "", -1, "", -1) + assertParseName(`fish.chunk.003.chunk.004`, "fish.chunk.003", 2, "", "") + assertParseName(`fish.chunk.003.chunk._info`, "fish.chunk.003", -1, "info", "") + assertParseName(`fish.chunk.003.chunk._Meta`, "", -1, "", "") + + assertParseName(`fish.chunk._info.chunk.004`, "fish.chunk._info", 2, "", "") + assertParseName(`fish.chunk._info.chunk._info`, "fish.chunk._info", -1, "info", "") + assertParseName(`fish.chunk._info.chunk._info.chunk._Meta`, "", -1, "", "") + + // base file name looking like a valid chunk name (old temporary suffix) + assertParseName(`fish.chunk.003.chunk.005..tmp_0000000022`, "fish.chunk.003", 3, "", "000m") + assertParseName(`fish.chunk.003.chunk._x..tmp_0000054321`, "", -1, "", "") + assertParseName(`fish.chunk._info.chunk.005..tmp_0000000023`, "fish.chunk._info", 3, "", "000n") + assertParseName(`fish.chunk._info.chunk._info.chunk._x..tmp_0000054321`, "", -1, "", "") + + assertParseName(`fish.chunk.003.chunk._blkinfo..tmp_9994567890123`, "fish.chunk.003", -1, "blkinfo", "3jjfvo3wr") + assertParseName(`fish.chunk._info.chunk._blkinfo..tmp_9994567890123`, "fish.chunk._info", -1, "blkinfo", "3jjfvo3wr") + + assertParseName(`fish.chunk.004..tmp_0000000021.chunk.004`, "fish.chunk.004..tmp_0000000021", 2, "", "") + assertParseName(`fish.chunk.004..tmp_0000000021.chunk.005..tmp_0000000025`, "fish.chunk.004..tmp_0000000021", 3, "", "000p") + assertParseName(`fish.chunk.004..tmp_0000000021.chunk._info`, "fish.chunk.004..tmp_0000000021", -1, "info", "") + assertParseName(`fish.chunk.004..tmp_0000000021.chunk._blkinfo..tmp_9994567890123`, "fish.chunk.004..tmp_0000000021", -1, "blkinfo", "3jjfvo3wr") + assertParseName(`fish.chunk.004..tmp_0000000021.chunk._Meta`, "", -1, "", "") + assertParseName(`fish.chunk.004..tmp_0000000021.chunk._x..tmp_0000054321`, "", -1, "", "") + + assertParseName(`fish.chunk._blkinfo..tmp_9994567890123.chunk.004`, "fish.chunk._blkinfo..tmp_9994567890123", 2, "", "") + assertParseName(`fish.chunk._blkinfo..tmp_9994567890123.chunk.005..tmp_0000000026`, "fish.chunk._blkinfo..tmp_9994567890123", 3, "", "000q") + assertParseName(`fish.chunk._blkinfo..tmp_9994567890123.chunk._info`, "fish.chunk._blkinfo..tmp_9994567890123", -1, "info", "") + assertParseName(`fish.chunk._blkinfo..tmp_9994567890123.chunk._blkinfo..tmp_9994567890123`, "fish.chunk._blkinfo..tmp_9994567890123", -1, "blkinfo", "3jjfvo3wr") + assertParseName(`fish.chunk._blkinfo..tmp_9994567890123.chunk._info.chunk._Meta`, "", -1, "", "") + assertParseName(`fish.chunk._blkinfo..tmp_9994567890123.chunk._info.chunk._x..tmp_0000054321`, "", -1, "", "") + + assertParseName(`fish.chunk._blkinfo..tmp_1234567890123456789.chunk.004`, "fish.chunk._blkinfo..tmp_1234567890123456789", 2, "", "") + assertParseName(`fish.chunk._blkinfo..tmp_1234567890123456789.chunk.005..tmp_0000000022`, "fish.chunk._blkinfo..tmp_1234567890123456789", 3, "", "000m") + assertParseName(`fish.chunk._blkinfo..tmp_1234567890123456789.chunk._info`, "fish.chunk._blkinfo..tmp_1234567890123456789", -1, "info", "") + assertParseName(`fish.chunk._blkinfo..tmp_1234567890123456789.chunk._blkinfo..tmp_9994567890123`, "fish.chunk._blkinfo..tmp_1234567890123456789", -1, "blkinfo", "3jjfvo3wr") + assertParseName(`fish.chunk._blkinfo..tmp_1234567890123456789.chunk._info.chunk._Meta`, "", -1, "", "") + assertParseName(`fish.chunk._blkinfo..tmp_1234567890123456789.chunk._info.chunk._x..tmp_0000054321`, "", -1, "", "") // attempts to make invalid chunk names - assertMakeNamePanics("fish", -1, "", -1) // neither data nor control - assertMakeNamePanics("fish", 0, "info", -1) // both data and control - assertMakeNamePanics("fish", -1, "futuredata", -1) // control type too long - assertMakeNamePanics("fish", -1, "123", -1) // digits not allowed - assertMakeNamePanics("fish", -1, "Meta", -1) // only lower case letters allowed - assertMakeNamePanics("fish", -1, "in-fo", -1) // punctuation not allowed - assertMakeNamePanics("fish", -1, "_info", -1) - assertMakeNamePanics("fish", -1, "info_", -1) - assertMakeNamePanics("fish", -2, ".bind", -3) - assertMakeNamePanics("fish", -2, "bind.", -3) - - assertMakeNamePanics("fish", -1, "", 1) // neither data nor control - assertMakeNamePanics("fish", 0, "info", 12) // both data and control - assertMakeNamePanics("fish", -1, "futuredata", 45) // control type too long - assertMakeNamePanics("fish", -1, "123", 123) // digits not allowed - assertMakeNamePanics("fish", -1, "Meta", 456) // only lower case letters allowed - assertMakeNamePanics("fish", -1, "in-fo", 321) // punctuation not allowed - assertMakeNamePanics("fish", -1, "_info", 15678) - assertMakeNamePanics("fish", -1, "info_", 999) - assertMakeNamePanics("fish", -2, ".bind", 0) - assertMakeNamePanics("fish", -2, "bind.", 0) + assertMakeNamePanics("fish", -1, "", "") // neither data nor control + assertMakeNamePanics("fish", 0, "info", "") // both data and control + assertMakeNamePanics("fish", -1, "metadata", "") // control type too long + assertMakeNamePanics("fish", -1, "blockinfo", "") // control type way too long + assertMakeNamePanics("fish", -1, "2xy", "") // first digit not allowed + assertMakeNamePanics("fish", -1, "123", "") // all digits not allowed + assertMakeNamePanics("fish", -1, "Meta", "") // only lower case letters allowed + assertMakeNamePanics("fish", -1, "in-fo", "") // punctuation not allowed + assertMakeNamePanics("fish", -1, "_info", "") + assertMakeNamePanics("fish", -1, "info_", "") + assertMakeNamePanics("fish", -2, ".bind", "") + assertMakeNamePanics("fish", -2, "bind.", "") + + assertMakeNamePanics("fish", -1, "", "1") // neither data nor control + assertMakeNamePanics("fish", 0, "info", "23") // both data and control + assertMakeNamePanics("fish", -1, "metadata", "45") // control type too long + assertMakeNamePanics("fish", -1, "blockinfo", "7") // control type way too long + assertMakeNamePanics("fish", -1, "2xy", "abc") // first digit not allowed + assertMakeNamePanics("fish", -1, "123", "def") // all digits not allowed + assertMakeNamePanics("fish", -1, "Meta", "mnk") // only lower case letters allowed + assertMakeNamePanics("fish", -1, "in-fo", "xyz") // punctuation not allowed + assertMakeNamePanics("fish", -1, "_info", "5678") + assertMakeNamePanics("fish", -1, "info_", "999") + assertMakeNamePanics("fish", -2, ".bind", "0") + assertMakeNamePanics("fish", -2, "bind.", "0") + + assertMakeNamePanics("fish", 0, "", "1234567890") // temporary suffix too long + assertMakeNamePanics("fish", 0, "", "123F4") // uppercase not allowed + assertMakeNamePanics("fish", 0, "", "123.") // punctuation not allowed + assertMakeNamePanics("fish", 0, "", "_123") } func testSmallFileInternals(t *testing.T, f *Fs) { @@ -383,7 +469,7 @@ func testPreventCorruption(t *testing.T, f *Fs) { billyObj := newFile("billy") billyChunkName := func(chunkNo int) string { - return f.makeChunkName(billyObj.Remote(), chunkNo, "", -1) + return f.makeChunkName(billyObj.Remote(), chunkNo, "", "") } err := f.Mkdir(ctx, billyChunkName(1)) @@ -433,7 +519,7 @@ func testPreventCorruption(t *testing.T, f *Fs) { // recreate billy in case it was anyhow corrupted willyObj := newFile("willy") - willyChunkName := f.makeChunkName(willyObj.Remote(), 1, "", -1) + willyChunkName := f.makeChunkName(willyObj.Remote(), 1, "", "") f.opt.FailHard = false willyChunk, err := f.NewObject(ctx, willyChunkName) f.opt.FailHard = true @@ -484,7 +570,7 @@ func testChunkNumberOverflow(t *testing.T, f *Fs) { f.opt.FailHard = false file, fileName := newFile(f, "wreaker") - wreak, _ := newFile(f.base, f.makeChunkName("wreaker", wreakNumber, "", -1)) + wreak, _ := newFile(f.base, f.makeChunkName("wreaker", wreakNumber, "", "")) f.opt.FailHard = false fstest.CheckListingWithRoot(t, f, dir, nil, nil, f.Precision()) @@ -532,7 +618,7 @@ func testMetadataInput(t *testing.T, f *Fs) { filename := path.Join(dir, name) require.True(t, len(contents) > 2 && len(contents) < minChunkForTest, description+" test data is correct") - part := putFile(f.base, f.makeChunkName(filename, 0, "", -1), "oops", "", true) + part := putFile(f.base, f.makeChunkName(filename, 0, "", ""), "oops", "", true) _ = putFile(f, filename, contents, "upload "+description, false) obj, err := f.NewObject(ctx, filename) diff --git a/docs/content/chunker.md b/docs/content/chunker.md index 8f362acfc09e3..80cb8e4157571 100644 --- a/docs/content/chunker.md +++ b/docs/content/chunker.md @@ -130,10 +130,10 @@ error message in such cases. #### Chunk names -The default chunk name format is `*.rclone-chunk.###`, hence by default -chunk names are `BIG_FILE_NAME.rclone-chunk.001`, -`BIG_FILE_NAME.rclone-chunk.002` etc. You can configure a different name -format using the `--chunker-name-format` option. The format uses asterisk +The default chunk name format is `*.rclone_chunk.###`, hence by default +chunk names are `BIG_FILE_NAME.rclone_chunk.001`, +`BIG_FILE_NAME.rclone_chunk.002` etc. You can configure another name format +using the `name_format` configuration file option. The format uses asterisk `*` as a placeholder for the base file name and one or more consecutive hash characters `#` as a placeholder for sequential chunk number. There must be one and only one asterisk. The number of consecutive hash @@ -211,6 +211,9 @@ file hashing, configure chunker with `md5all` or `sha1all`. These two modes guarantee given hash for all files. If wrapped remote doesn't support it, chunker will then add metadata to all files, even small. However, this can double the amount of small files in storage and incur additional service charges. +You can even use chunker to force md5/sha1 support in any other remote +at expence of sidecar meta objects by setting eg. `chunk_type=sha1all` +to force hashsums and `chunk_size=1P` to effectively disable chunking. Normally, when a file is copied to chunker controlled remote, chunker will ask the file source for compatible file hash and revert to on-the-fly @@ -274,6 +277,14 @@ Chunker requires wrapped remote to support server side `move` (or `copy` + This is because it internally renames temporary chunk files to their final names when an operation completes successfully. +Chunker encodes chunk number in file name, so with default `name_format` +setting it adds 17 characters. Also chunker adds 7 characters of temporary +suffix during operations. Many file systems limit base file name without path +by 255 characters. Using rclone's crypt remote as a base file system limits +file name by 143 characters. Thus, maximum name length is 231 for most files +and 119 for chunker-over-crypt. A user in need can change name format to +eg. `*.rcc##` and save 10 characters (provided at most 99 chunks per file). + Note that a move implemented using the copy-and-delete method may incur double charging with some cloud storage providers. From a186284b23c8125e76180dc43953ed17ad8d3d47 Mon Sep 17 00:00:00 2001 From: buengese Date: Thu, 5 Dec 2019 13:32:42 +0100 Subject: [PATCH 30/47] asyncreader: fix EOF error --- fs/asyncreader/asyncreader.go | 7 +++++++ fs/asyncreader/asyncreader_test.go | 6 +++--- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/fs/asyncreader/asyncreader.go b/fs/asyncreader/asyncreader.go index b4feb31495895..c385aace514d8 100644 --- a/fs/asyncreader/asyncreader.go +++ b/fs/asyncreader/asyncreader.go @@ -174,6 +174,9 @@ func (a *AsyncReader) WriteTo(w io.Writer) (n int64, err error) { n = 0 for { err = a.fill() + if err == io.EOF { + return n, nil + } if err != nil { return n, err } @@ -183,6 +186,10 @@ func (a *AsyncReader) WriteTo(w io.Writer) (n int64, err error) { if err != nil { return n, err } + if a.cur.err == io.EOF { + a.err = a.cur.err + return n, err + } if a.cur.err != nil { a.err = a.cur.err return n, a.cur.err diff --git a/fs/asyncreader/asyncreader_test.go b/fs/asyncreader/asyncreader_test.go index 2b4e8aaba86fa..8419cba2beab8 100644 --- a/fs/asyncreader/asyncreader_test.go +++ b/fs/asyncreader/asyncreader_test.go @@ -60,12 +60,12 @@ func TestAsyncWriteTo(t *testing.T) { var dst = &bytes.Buffer{} n, err := io.Copy(dst, ar) - assert.Equal(t, io.EOF, err) + require.NoError(t, err) assert.Equal(t, int64(10), n) - // Should still return EOF + // Should still not return any errors n, err = io.Copy(dst, ar) - assert.Equal(t, io.EOF, err) + require.NoError(t, err) assert.Equal(t, int64(0), n) err = ar.Close() From 241921c786cddffef606f980add71f581d2e5914 Mon Sep 17 00:00:00 2001 From: Nick Craig-Wood Date: Mon, 9 Dec 2019 14:25:54 +0000 Subject: [PATCH 31/47] vfs: don't cache the path in RW file objects to fix renaming --- vfs/file.go | 7 ++++++- vfs/read_write.go | 47 ++++++++++++++++++++++------------------------- 2 files changed, 28 insertions(+), 26 deletions(-) diff --git a/vfs/file.go b/vfs/file.go index ab6dbee4009af..8a6e1fe497ff7 100644 --- a/vfs/file.go +++ b/vfs/file.go @@ -95,6 +95,11 @@ func (f *File) Path() string { return path.Join(f.d.path, f.leaf) } +// osPath returns the full path of the file in the cache in OS format +func (f *File) osPath() string { + return f.d.vfs.cache.toOSPath(f.Path()) +} + // Sys returns underlying data source (can be nil) - satisfies Node interface func (f *File) Sys() interface{} { return nil @@ -473,7 +478,7 @@ func (f *File) openRW(flags int) (fh *RWFileHandle, err error) { } // fs.Debugf(o, "File.openRW") - fh, err = newRWFileHandle(d, f, f.Path(), flags) + fh, err = newRWFileHandle(d, f, flags) if err != nil { fs.Errorf(f, "File.openRW failed: %v", err) return nil, err diff --git a/vfs/read_write.go b/vfs/read_write.go index 58742e9d9f345..05fcf8a8826a6 100644 --- a/vfs/read_write.go +++ b/vfs/read_write.go @@ -24,14 +24,12 @@ type RWFileHandle struct { *os.File mu sync.Mutex closed bool // set if handle has been closed - remote string file *File d *Dir opened bool - flags int // open flags - osPath string // path to the file in the cache - writeCalled bool // if any Write() methods have been called - changed bool // file contents was changed in any other way + flags int // open flags + writeCalled bool // if any Write() methods have been called + changed bool // file contents was changed in any other way } // Check interfaces @@ -44,26 +42,25 @@ var ( _ io.Closer = (*RWFileHandle)(nil) ) -func newRWFileHandle(d *Dir, f *File, remote string, flags int) (fh *RWFileHandle, err error) { +func newRWFileHandle(d *Dir, f *File, flags int) (fh *RWFileHandle, err error) { // if O_CREATE and O_EXCL are set and if path already exists, then return EEXIST if flags&(os.O_CREATE|os.O_EXCL) == os.O_CREATE|os.O_EXCL && f.exists() { return nil, EEXIST } fh = &RWFileHandle{ - file: f, - d: d, - remote: remote, - flags: flags, + file: f, + d: d, + flags: flags, } // mark the file as open in the cache - must be done before the mkdir - fh.d.vfs.cache.open(fh.remote) + fh.d.vfs.cache.open(fh.file.Path()) // Make a place for the file - fh.osPath, err = d.vfs.cache.mkdir(remote) + _, err = d.vfs.cache.mkdir(fh.file.Path()) if err != nil { - fh.d.vfs.cache.close(fh.remote) + fh.d.vfs.cache.close(fh.file.Path()) return nil, errors.Wrap(err, "open RW handle failed to make cache directory") } @@ -113,9 +110,9 @@ func (fh *RWFileHandle) openPending(truncate bool) (err error) { // If the remote object exists AND its cached file exists locally AND there are no // other RW handles with it open, then attempt to update it. if o != nil && fh.file.rwOpens() == 0 { - cacheObj, err := fh.d.vfs.cache.f.NewObject(context.TODO(), fh.remote) + cacheObj, err := fh.d.vfs.cache.f.NewObject(context.TODO(), fh.file.Path()) if err == nil && cacheObj != nil { - _, err = copyObj(fh.d.vfs.cache.f, cacheObj, fh.remote, o) + _, err = copyObj(fh.d.vfs.cache.f, cacheObj, fh.file.Path(), o) if err != nil { return errors.Wrap(err, "open RW handle failed to update cached file") } @@ -123,12 +120,12 @@ func (fh *RWFileHandle) openPending(truncate bool) (err error) { } // try to open a exising cache file - fd, err = file.OpenFile(fh.osPath, cacheFileOpenFlags&^os.O_CREATE, 0600) + fd, err = file.OpenFile(fh.file.osPath(), cacheFileOpenFlags&^os.O_CREATE, 0600) if os.IsNotExist(err) { // cache file does not exist, so need to fetch it if we have an object to fetch // it from if o != nil { - _, err = copyObj(fh.d.vfs.cache.f, nil, fh.remote, o) + _, err = copyObj(fh.d.vfs.cache.f, nil, fh.file.Path(), o) if err != nil { cause := errors.Cause(err) if cause != fs.ErrorObjectNotFound && cause != fs.ErrorDirNotFound { @@ -162,7 +159,7 @@ func (fh *RWFileHandle) openPending(truncate bool) (err error) { fh.changed = true if fh.flags&os.O_CREATE == 0 && fh.file.exists() { // create an empty file if it exists on the source - err = ioutil.WriteFile(fh.osPath, []byte{}, 0600) + err = ioutil.WriteFile(fh.file.osPath(), []byte{}, 0600) if err != nil { return errors.Wrap(err, "cache open failed to create zero length file") } @@ -172,9 +169,9 @@ func (fh *RWFileHandle) openPending(truncate bool) (err error) { // exists in these cases. if runtime.GOOS == "windows" && fh.flags&os.O_APPEND != 0 { cacheFileOpenFlags &^= os.O_TRUNC - _, err = os.Stat(fh.osPath) + _, err = os.Stat(fh.file.osPath()) if err == nil { - err = os.Truncate(fh.osPath, 0) + err = os.Truncate(fh.file.osPath(), 0) if err != nil { return errors.Wrap(err, "cache open failed to truncate") } @@ -184,7 +181,7 @@ func (fh *RWFileHandle) openPending(truncate bool) (err error) { if fd == nil { fs.Debugf(fh.logPrefix(), "Opening cached copy with flags=%s", decodeOpenFlags(fh.flags)) - fd, err = file.OpenFile(fh.osPath, cacheFileOpenFlags, 0600) + fd, err = file.OpenFile(fh.file.osPath(), cacheFileOpenFlags, 0600) if err != nil { return errors.Wrap(err, "cache open file failed") } @@ -280,14 +277,14 @@ func (fh *RWFileHandle) flushWrites(closeFile bool) error { if isCopied { // Transfer the temp file to the remote - cacheObj, err := fh.d.vfs.cache.f.NewObject(context.TODO(), fh.remote) + cacheObj, err := fh.d.vfs.cache.f.NewObject(context.TODO(), fh.file.Path()) if err != nil { err = errors.Wrap(err, "failed to find cache file") fs.Errorf(fh.logPrefix(), "%v", err) return err } - o, err := copyObj(fh.d.vfs.f, fh.file.getObject(), fh.remote, cacheObj) + o, err := copyObj(fh.d.vfs.f, fh.file.getObject(), fh.file.Path(), cacheObj) if err != nil { err = errors.Wrap(err, "failed to transfer file from cache to remote") fs.Errorf(fh.logPrefix(), "%v", err) @@ -320,7 +317,7 @@ func (fh *RWFileHandle) close() (err error) { if fh.opened { fh.file.delRWOpen() } - fh.d.vfs.cache.close(fh.remote) + fh.d.vfs.cache.close(fh.file.Path()) }() return fh.flushWrites(true) @@ -549,5 +546,5 @@ func (fh *RWFileHandle) Sync() error { } func (fh *RWFileHandle) logPrefix() string { - return fmt.Sprintf("%s(%p)", fh.remote, fh) + return fmt.Sprintf("%s(%p)", fh.file.Path(), fh) } From 6e683b4359c30365706ff106a63ff07ddfc51449 Mon Sep 17 00:00:00 2001 From: Nick Craig-Wood Date: Thu, 5 Dec 2019 12:58:24 +0000 Subject: [PATCH 32/47] vfs: fix rename of open files when using the VFS cache Before this change, renaming an open file when using the VFS cache was delayed until the file was closed. This meant that the file was not readable after a rename even though it is was in the cache. After this change we rename the local cache file and the in memory cache, delaying only the rename of the file in object storage. See: https://forum.rclone.org/t/xen-orchestra-ebadf-bad-file-descriptor-write/13104 --- vfs/file.go | 54 ++++++++++++------ vfs/file_test.go | 122 ++++++++++++++++++++++++++++++++++++----- vfs/read_write_test.go | 2 +- 3 files changed, 146 insertions(+), 32 deletions(-) diff --git a/vfs/file.go b/vfs/file.go index 8a6e1fe497ff7..bc213d537d1eb 100644 --- a/vfs/file.go +++ b/vfs/file.go @@ -137,29 +137,42 @@ func (f *File) applyPendingRename() { func (f *File) rename(ctx context.Context, destDir *Dir, newName string) error { f.mu.RLock() d := f.d + o := f.o + oldPendingRenameFun := f.pendingRenameFun f.mu.RUnlock() + if features := d.f.Features(); features.Move == nil && features.Copy == nil { err := errors.Errorf("Fs %q can't rename files (no server side Move or Copy)", d.f) fs.Errorf(f.Path(), "Dir.Rename error: %v", err) return err } + newPath := path.Join(destDir.path, newName) + renameCall := func(ctx context.Context) error { - newPath := path.Join(destDir.path, newName) + f.mu.RLock() + o := f.o + f.mu.RUnlock() + if o == nil { + return errors.New("Cannot rename: file object is not available") + } + + // chain rename calls if any + if oldPendingRenameFun != nil { + err := oldPendingRenameFun(ctx) + if err != nil { + return err + } + } + + // do the move of the remote object dstOverwritten, _ := d.f.NewObject(ctx, newPath) - newObject, err := operations.Move(ctx, d.f, dstOverwritten, newPath, f.o) + newObject, err := operations.Move(ctx, d.f, dstOverwritten, newPath, o) if err != nil { fs.Errorf(f.Path(), "File.Rename error: %v", err) return err } - // Rename in the cache too if it exists - if f.d.vfs.Opt.CacheMode >= CacheModeWrites && f.d.vfs.cache.exists(f.Path()) { - if err := f.d.vfs.cache.rename(f.Path(), newPath); err != nil { - fs.Infof(f.Path(), "File.Rename failed in Cache: %v", err) - } - } - // newObject can be nil here for example if --dry-run if newObject == nil { err = errors.New("rename failed: nil object returned") @@ -167,25 +180,32 @@ func (f *File) rename(ctx context.Context, destDir *Dir, newName string) error { return err } // Update the node with the new details - fs.Debugf(f.o, "Updating file with %v %p", newObject, f) + fs.Debugf(o, "Updating file with %v %p", newObject, f) // f.rename(destDir, newObject) f.mu.Lock() f.o = newObject - f.d = destDir - f.leaf = path.Base(newObject.Remote()) f.pendingRenameFun = nil f.mu.Unlock() return nil } - f.mu.RLock() + // Rename in the cache if it exists + if f.d.vfs.Opt.CacheMode != CacheModeOff && f.d.vfs.cache.exists(f.Path()) { + if err := f.d.vfs.cache.rename(f.Path(), newPath); err != nil { + fs.Infof(f.Path(), "File.Rename failed in Cache: %v", err) + } + } + + // rename the file object + f.mu.Lock() + f.d = destDir + f.leaf = newName writing := f._writingInProgress() - f.mu.RUnlock() + f.mu.Unlock() + if writing { - fs.Debugf(f.o, "File is currently open, delaying rename %p", f) + fs.Debugf(o, "File is currently open, delaying rename %p", f) f.mu.Lock() - f.d = destDir - f.leaf = newName f.pendingRenameFun = renameCall f.mu.Unlock() return nil diff --git a/vfs/file_test.go b/vfs/file_test.go index 70848ef63fd60..cf1c1f1954661 100644 --- a/vfs/file_test.go +++ b/vfs/file_test.go @@ -6,6 +6,7 @@ import ( "os" "testing" + "github.com/rclone/rclone/fs" "github.com/rclone/rclone/fstest" "github.com/rclone/rclone/fstest/mockfs" "github.com/rclone/rclone/fstest/mockobject" @@ -13,8 +14,10 @@ import ( "github.com/stretchr/testify/require" ) -func fileCreate(t *testing.T, r *fstest.Run) (*VFS, *File, fstest.Item) { - vfs := New(r.Fremote, nil) +func fileCreate(t *testing.T, r *fstest.Run, mode CacheMode) (*VFS, *File, fstest.Item) { + opt := DefaultOpt + opt.CacheMode = mode + vfs := New(r.Fremote, &opt) file1 := r.WriteObject(context.Background(), "dir/file1", "file1 contents", t1) fstest.CheckItems(t, r.Fremote, file1) @@ -29,7 +32,7 @@ func fileCreate(t *testing.T, r *fstest.Run) (*VFS, *File, fstest.Item) { func TestFileMethods(t *testing.T) { r := fstest.NewRun(t) defer r.Finalise() - vfs, file, _ := fileCreate(t, r) + vfs, file, _ := fileCreate(t, r, CacheModeOff) // String assert.Equal(t, "dir/file1", file.String()) @@ -84,7 +87,7 @@ func TestFileSetModTime(t *testing.T) { return } defer r.Finalise() - vfs, file, file1 := fileCreate(t, r) + vfs, file, file1 := fileCreate(t, r, CacheModeOff) err := file.SetModTime(t2) require.NoError(t, err) @@ -97,12 +100,8 @@ func TestFileSetModTime(t *testing.T) { assert.Equal(t, EROFS, err) } -func TestFileOpenRead(t *testing.T) { - r := fstest.NewRun(t) - defer r.Finalise() - _, file, _ := fileCreate(t, r) - - fd, err := file.openRead() +func fileCheckContents(t *testing.T, file *File) { + fd, err := file.Open(os.O_RDONLY) require.NoError(t, err) contents, err := ioutil.ReadAll(fd) @@ -112,6 +111,14 @@ func TestFileOpenRead(t *testing.T) { require.NoError(t, fd.Close()) } +func TestFileOpenRead(t *testing.T) { + r := fstest.NewRun(t) + defer r.Finalise() + _, file, _ := fileCreate(t, r, CacheModeOff) + + fileCheckContents(t, file) +} + func TestFileOpenReadUnknownSize(t *testing.T) { var ( contents = []byte("file contents") @@ -160,7 +167,7 @@ func TestFileOpenReadUnknownSize(t *testing.T) { func TestFileOpenWrite(t *testing.T) { r := fstest.NewRun(t) defer r.Finalise() - vfs, file, _ := fileCreate(t, r) + vfs, file, _ := fileCreate(t, r, CacheModeOff) fd, err := file.openWrite(os.O_WRONLY | os.O_TRUNC) require.NoError(t, err) @@ -181,7 +188,7 @@ func TestFileOpenWrite(t *testing.T) { func TestFileRemove(t *testing.T) { r := fstest.NewRun(t) defer r.Finalise() - vfs, file, _ := fileCreate(t, r) + vfs, file, _ := fileCreate(t, r, CacheModeOff) err := file.Remove() require.NoError(t, err) @@ -196,7 +203,7 @@ func TestFileRemove(t *testing.T) { func TestFileRemoveAll(t *testing.T) { r := fstest.NewRun(t) defer r.Finalise() - vfs, file, _ := fileCreate(t, r) + vfs, file, _ := fileCreate(t, r, CacheModeOff) err := file.RemoveAll() require.NoError(t, err) @@ -211,7 +218,7 @@ func TestFileRemoveAll(t *testing.T) { func TestFileOpen(t *testing.T) { r := fstest.NewRun(t) defer r.Finalise() - _, file, _ := fileCreate(t, r) + _, file, _ := fileCreate(t, r, CacheModeOff) fd, err := file.Open(os.O_RDONLY) require.NoError(t, err) @@ -233,3 +240,90 @@ func TestFileOpen(t *testing.T) { fd, err = file.Open(3) assert.Equal(t, EPERM, err) } + +func testFileRename(t *testing.T, mode CacheMode) { + r := fstest.NewRun(t) + defer r.Finalise() + vfs, file, item := fileCreate(t, r, mode) + + rootDir, err := vfs.Root() + require.NoError(t, err) + + // check file in cache + if mode != CacheModeOff { + // read contents to get file in cache + fileCheckContents(t, file) + assert.True(t, vfs.cache.exists(item.Path)) + } + + dir := file.Dir() + + // start with "dir/file1" + fstest.CheckItems(t, r.Fremote, item) + + // rename file to "newLeaf" + err = dir.Rename("file1", "newLeaf", rootDir) + require.NoError(t, err) + + item.Path = "newLeaf" + fstest.CheckItems(t, r.Fremote, item) + + // check file in cache + if mode != CacheModeOff { + assert.True(t, vfs.cache.exists(item.Path)) + } + + // check file exists in the vfs layer at its new name + _, err = vfs.Stat("newLeaf") + require.NoError(t, err) + + // rename it back to "dir/file1" + err = rootDir.Rename("newLeaf", "file1", dir) + require.NoError(t, err) + + item.Path = "dir/file1" + fstest.CheckItems(t, r.Fremote, item) + + // check file in cache + if mode != CacheModeOff { + assert.True(t, vfs.cache.exists(item.Path)) + } + + // now try renaming it with the file open + // first open it and write to it but dont close it + fd, err := file.Open(os.O_WRONLY | os.O_TRUNC) + require.NoError(t, err) + newContents := []byte("this is some new contents") + _, err = fd.Write(newContents) + require.NoError(t, err) + + // rename file to "newLeaf" + err = dir.Rename("file1", "newLeaf", rootDir) + require.NoError(t, err) + newItem := fstest.NewItem("newLeaf", string(newContents), item.ModTime) + + // check file has been renamed immediately in the cache + if mode != CacheModeOff { + assert.True(t, vfs.cache.exists("newLeaf")) + } + + // check file exists in the vfs layer at its new name + _, err = vfs.Stat("newLeaf") + require.NoError(t, err) + + // Close the file + require.NoError(t, fd.Close()) + + // Check file has now been renamed on the remote + item.Path = "newLeaf" + fstest.CheckListingWithPrecision(t, r.Fremote, []fstest.Item{newItem}, nil, fs.ModTimeNotSupported) +} + +func TestFileRename(t *testing.T) { + t.Run("CacheModeOff", func(t *testing.T) { + testFileRename(t, CacheModeOff) + }) + t.Run("CacheModeFull", func(t *testing.T) { + testFileRename(t, CacheModeFull) + }) +} diff --git a/vfs/read_write_test.go b/vfs/read_write_test.go index 948d8b08284cc..403e61b33b3d3 100644 --- a/vfs/read_write_test.go +++ b/vfs/read_write_test.go @@ -698,7 +698,7 @@ func TestRWFileModTimeWithOpenWriters(t *testing.T) { } } -func TestCacheRename(t *testing.T) { +func TestRWCacheRename(t *testing.T) { r := fstest.NewRun(t) defer r.Finalise() From 1ab49850468a00b7e3131f985f36d90831091fe4 Mon Sep 17 00:00:00 2001 From: Nick Craig-Wood Date: Mon, 9 Dec 2019 15:23:08 +0000 Subject: [PATCH 33/47] vfs: when renaming files in the cache, rename the cache item in memory too --- vfs/cache.go | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/vfs/cache.go b/vfs/cache.go index 41ace30050647..8a9ade07ab345 100644 --- a/vfs/cache.go +++ b/vfs/cache.go @@ -309,6 +309,13 @@ func (c *cache) rename(name string, newName string) (err error) { if err = os.Rename(osOldPath, osNewPath); err != nil { return errors.Wrapf(err, "Failed to rename in cache: %s to %s", osOldPath, osNewPath) } + // Rename the cache item + c.itemMu.Lock() + if oldItem, ok := c.item[name]; ok { + c.item[newName] = oldItem + delete(c.item, name) + } + c.itemMu.Unlock() fs.Infof(name, "Renamed in cache") return nil } From 0ecb8bc2f97a2c3715b41bf7188d97dca5d2a3b5 Mon Sep 17 00:00:00 2001 From: Nick Craig-Wood Date: Wed, 11 Dec 2019 17:23:52 +0000 Subject: [PATCH 34/47] s3: fix url decoding of NextMarker - fixes #3799 Before this patch we were failing to URL decode the NextMarker when url encoding was used for the listing. The result of this was duplicated listings entries for directories with >1000 entries where the NextMarker was a file containing a space. --- backend/s3/s3.go | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/backend/s3/s3.go b/backend/s3/s3.go index 5fe8cce298f5a..d11c0ea13736b 100644 --- a/backend/s3/s3.go +++ b/backend/s3/s3.go @@ -1398,6 +1398,12 @@ func (f *Fs) list(ctx context.Context, bucket, directory, prefix string, addBuck } else { marker = resp.NextMarker } + if urlEncodeListings { + *marker, err = url.QueryUnescape(*marker) + if err != nil { + return errors.Wrapf(err, "failed to URL decode NextMarker %q", *marker) + } + } } return nil } From 59c75ba442f3e1ac8627d750dcba32859b3066a9 Mon Sep 17 00:00:00 2001 From: Cnly Date: Fri, 20 Dec 2019 14:37:21 +0800 Subject: [PATCH 35/47] accounting: fix error count shown as checks - fixes #3814 --- fs/accounting/stats.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/accounting/stats.go b/fs/accounting/stats.go index a2e44fb604470..9f75026103efc 100644 --- a/fs/accounting/stats.go +++ b/fs/accounting/stats.go @@ -320,7 +320,7 @@ func (s *StatsInfo) String() string { } if s.checks != 0 || totalChecks != 0 { _, _ = fmt.Fprintf(buf, "Checks: %10d / %d, %s\n", - s.errors, totalChecks, percent(s.checks, totalChecks)) + s.checks, totalChecks, percent(s.checks, totalChecks)) } if s.deletes != 0 { _, _ = fmt.Fprintf(buf, "Deleted: %10d\n", s.deletes) From 4daecd3158bfacbe89e7297396f94922bac84bbf Mon Sep 17 00:00:00 2001 From: Wei He Date: Sun, 22 Dec 2019 03:35:26 -0500 Subject: [PATCH 36/47] docs: fix in-page anchor navigation positioning --- docs/static/css/custom.css | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/docs/static/css/custom.css b/docs/static/css/custom.css index af55c80728901..4f9a0573473e9 100644 --- a/docs/static/css/custom.css +++ b/docs/static/css/custom.css @@ -43,7 +43,11 @@ h3:hover .header-link, h4:hover .header-link, h5:hover .header-link, h6:hover .header-link { - opacity: 1; + opacity: 1; +} +h2, h3, h4, h5, h6 { + padding-top: 55px; + margin-top: -44px; } /* Fix spacing between menu items */ From f754d897e54c07de22256c518f0c975e747eb4ed Mon Sep 17 00:00:00 2001 From: Nick Craig-Wood Date: Sat, 28 Dec 2019 13:29:08 +0000 Subject: [PATCH 37/47] Add Wei He to contributors --- docs/content/authors.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/content/authors.md b/docs/content/authors.md index 3729840c56a30..98114ecf8c83d 100644 --- a/docs/content/authors.md +++ b/docs/content/authors.md @@ -316,3 +316,4 @@ Contributors * anuar45 * Fernando * David Cole + * Wei He From 207474ababb22a1c2f85bf26c8f2e4dffd258c0e Mon Sep 17 00:00:00 2001 From: Nick Craig-Wood Date: Thu, 17 Oct 2019 17:41:11 +0100 Subject: [PATCH 38/47] sync: add --no-check-dest flag - fixes #3616 --- docs/content/docs.md | 17 +++++++++++++++++ fs/config.go | 1 + fs/config/configflags/configflags.go | 1 + fs/march/march.go | 4 +++- fs/operations/operations.go | 13 ++++++++----- fs/sync/sync.go | 14 ++++++++++++++ 6 files changed, 44 insertions(+), 6 deletions(-) diff --git a/docs/content/docs.md b/docs/content/docs.md index 46a7955e39036..b7c1948016ed0 100644 --- a/docs/content/docs.md +++ b/docs/content/docs.md @@ -770,6 +770,23 @@ in effect (the defaults): - 500MB..750MB files will be downloaded with 3 streams - 750MB+ files will be downloaded with 4 streams +### --no-check-dest ### + +The `--no-check-dest` can be used with `move` or `copy` and it causes +rclone not to check the destination at all when copying files. + +This means that: + +- the destination is not listed minimising the API calls +- files are always transferred +- this can cause duplicates on remotes which allow it (eg Google Drive) +- `--retries 1` is recommended otherwise you'll transfer everything again on a retry + +This flag is useful to minimise the transactions if you know that none +of the files are on the destination. + +This is a specialized flag which should be ignored by most users! + ### --no-gzip-encoding ### Don't set `Accept-Encoding: gzip`. This means that rclone won't ask diff --git a/fs/config.go b/fs/config.go index 0e7f30f7bbbb7..52df901ddf26a 100644 --- a/fs/config.go +++ b/fs/config.go @@ -67,6 +67,7 @@ type ConfigInfo struct { IgnoreChecksum bool IgnoreCaseSync bool NoTraverse bool + NoCheckDest bool NoUpdateModTime bool DataRateUnit string CompareDest string diff --git a/fs/config/configflags/configflags.go b/fs/config/configflags/configflags.go index 8aa868dd16e67..f515e0e3e5708 100644 --- a/fs/config/configflags/configflags.go +++ b/fs/config/configflags/configflags.go @@ -68,6 +68,7 @@ func AddFlags(flagSet *pflag.FlagSet) { flags.BoolVarP(flagSet, &fs.Config.IgnoreChecksum, "ignore-checksum", "", fs.Config.IgnoreChecksum, "Skip post copy check of checksums.") flags.BoolVarP(flagSet, &fs.Config.IgnoreCaseSync, "ignore-case-sync", "", fs.Config.IgnoreCaseSync, "Ignore case when synchronizing") flags.BoolVarP(flagSet, &fs.Config.NoTraverse, "no-traverse", "", fs.Config.NoTraverse, "Don't traverse destination file system on copy.") + flags.BoolVarP(flagSet, &fs.Config.NoCheckDest, "no-check-dest", "", fs.Config.NoCheckDest, "Don't check the destination, copy regardless.") flags.BoolVarP(flagSet, &fs.Config.NoUpdateModTime, "no-update-modtime", "", fs.Config.NoUpdateModTime, "Don't update destination mod-time if files identical.") flags.StringVarP(flagSet, &fs.Config.CompareDest, "compare-dest", "", fs.Config.CompareDest, "Include additional server-side path during comparison.") flags.StringVarP(flagSet, &fs.Config.CopyDest, "copy-dest", "", fs.Config.CopyDest, "Implies --compare-dest but also copies files from path into destination.") diff --git a/fs/march/march.go b/fs/march/march.go index d27337b863bff..cf50812b0f10f 100644 --- a/fs/march/march.go +++ b/fs/march/march.go @@ -30,6 +30,7 @@ type March struct { SrcIncludeAll bool // don't include all files in the src DstIncludeAll bool // don't include all files in the destination Callback Marcher // object to call with results + NoCheckDest bool // transfer all objects regardless without checking dst // internal state srcListDir listDirFn // function to call to list a directory in the src dstListDir listDirFn // function to call to list a directory in the dst @@ -188,6 +189,7 @@ func (m *March) Run() error { srcDepth: srcDepth - 1, dstRemote: m.Dir, dstDepth: dstDepth - 1, + noDst: m.NoCheckDest, } go func() { // when the context is cancelled discard the remaining jobs @@ -406,7 +408,7 @@ func (m *March) processJob(job listDirJob) ([]listDirJob, error) { // If NoTraverse is set, then try to find a matching object // for each item in the srcList - if m.NoTraverse { + if m.NoTraverse && !m.NoCheckDest { for _, src := range srcList { if srcObj, ok := src.(fs.Object); ok { leaf := path.Base(srcObj.Remote()) diff --git a/fs/operations/operations.go b/fs/operations/operations.go index 0df993486a57a..c90c673999467 100644 --- a/fs/operations/operations.go +++ b/fs/operations/operations.go @@ -1705,11 +1705,14 @@ func moveOrCopyFile(ctx context.Context, fdst fs.Fs, fsrc fs.Fs, dstFileName str } // Find dst object if it exists - dstObj, err := fdst.NewObject(ctx, dstFileName) - if err == fs.ErrorObjectNotFound { - dstObj = nil - } else if err != nil { - return err + var dstObj fs.Object + if !fs.Config.NoCheckDest { + dstObj, err = fdst.NewObject(ctx, dstFileName) + if err == fs.ErrorObjectNotFound { + dstObj = nil + } else if err != nil { + return err + } } // Special case for changing case of a file on a case insensitive remote diff --git a/fs/sync/sync.go b/fs/sync/sync.go index 850d63381ebdb..ca89533c1fe73 100644 --- a/fs/sync/sync.go +++ b/fs/sync/sync.go @@ -31,6 +31,7 @@ type syncCopyMove struct { ctx context.Context // internal context for controlling go-routines cancel func() // cancel the context noTraverse bool // if set don't traverse the dst + noCheckDest bool // if set transfer all objects regardless without checking dst deletersWg sync.WaitGroup // for delete before go routine deleteFilesCh chan fs.Object // channel to receive deletes if delete before trackRenames bool // set if we should do server side renames @@ -82,6 +83,7 @@ func newSyncCopyMove(ctx context.Context, fdst, fsrc fs.Fs, deleteMode fs.Delete dstEmptyDirs: make(map[string]fs.DirEntry), srcEmptyDirs: make(map[string]fs.DirEntry), noTraverse: fs.Config.NoTraverse, + noCheckDest: fs.Config.NoCheckDest, toBeChecked: newPipe(accounting.Stats(ctx).SetCheckQueue, fs.Config.MaxBacklog), toBeUploaded: newPipe(accounting.Stats(ctx).SetTransferQueue, fs.Config.MaxBacklog), deleteFilesCh: make(chan fs.Object, fs.Config.Checkers), @@ -95,6 +97,17 @@ func newSyncCopyMove(ctx context.Context, fdst, fsrc fs.Fs, deleteMode fs.Delete fs.Errorf(nil, "Ignoring --no-traverse with sync") s.noTraverse = false } + if s.noCheckDest { + if s.deleteMode != fs.DeleteModeOff { + return nil, errors.New("can't use --no-check-dest with sync: use copy instead") + } + if fs.Config.Immutable { + return nil, errors.New("can't use --no-check-dest with --immutable") + } + if s.backupDir != nil { + return nil, errors.New("can't use --no-check-dest with --backup-dir") + } + } if s.trackRenames { // Don't track renames for remotes without server-side move support. if !operations.CanServerSideMove(fdst) { @@ -667,6 +680,7 @@ func (s *syncCopyMove) run() error { NoTraverse: s.noTraverse, Callback: s, DstIncludeAll: filter.Active.Opt.DeleteExcluded, + NoCheckDest: s.noCheckDest, } s.processError(m.Run()) From db1c7f9ca8ca4eb9331b5c9449a6713e5b8df3b0 Mon Sep 17 00:00:00 2001 From: Outvi V <19144373+outloudvi@users.noreply.github.com> Date: Thu, 2 Jan 2020 11:10:48 +0000 Subject: [PATCH 39/47] s3: Add new region Asia Patific (Hong Kong) --- backend/s3/s3.go | 6 ++++++ docs/content/s3.md | 14 ++++++++++++-- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/backend/s3/s3.go b/backend/s3/s3.go index d11c0ea13736b..04fdefe3a489a 100644 --- a/backend/s3/s3.go +++ b/backend/s3/s3.go @@ -160,6 +160,9 @@ func init() { }, { Value: "ap-south-1", Help: "Asia Pacific (Mumbai)\nNeeds location constraint ap-south-1.", + }, { + Value: "ap-east-1", + Help: "Asia Patific (Hong Kong) Region\nNeeds location constraint ap-east-1.", }, { Value: "sa-east-1", Help: "South America (Sao Paulo) Region\nNeeds location constraint sa-east-1.", @@ -428,6 +431,9 @@ func init() { }, { Value: "ap-south-1", Help: "Asia Pacific (Mumbai)", + }, { + Value: "ap-east-1", + Help: "Asia Pacific (Hong Kong)", }, { Value: "sa-east-1", Help: "South America (Sao Paulo) Region.", diff --git a/docs/content/s3.md b/docs/content/s3.md index 7e5311d30ba82..d000c291cea1e 100644 --- a/docs/content/s3.md +++ b/docs/content/s3.md @@ -136,8 +136,11 @@ Choose a number from below, or type in your own value / Asia Pacific (Mumbai) 13 | Needs location constraint ap-south-1. \ "ap-south-1" + / Asia Patific (Hong Kong) Region +14 | Needs location constraint ap-east-1. + \ "ap-east-1" / South America (Sao Paulo) Region -14 | Needs location constraint sa-east-1. +15 | Needs location constraint sa-east-1. \ "sa-east-1" region> 1 Endpoint for S3 API. @@ -171,7 +174,9 @@ Choose a number from below, or type in your own value \ "ap-northeast-2" 13 / Asia Pacific (Mumbai) \ "ap-south-1" -14 / South America (Sao Paulo) Region. +14 / Asia Pacific (Hong Kong) + \ "ap-east-1" +15 / South America (Sao Paulo) Region. \ "sa-east-1" location_constraint> 1 Canned ACL used when creating buckets and/or storing objects in S3. @@ -556,6 +561,9 @@ Region to connect to. - "ap-south-1" - Asia Pacific (Mumbai) - Needs location constraint ap-south-1. + - "ap-east-1" + - Asia Pacific (Hong Kong) Region + - Needs location constraint ap-east-1. - "sa-east-1" - South America (Sao Paulo) Region - Needs location constraint sa-east-1. @@ -775,6 +783,8 @@ Used when creating buckets only. - Asia Pacific (Seoul) - "ap-south-1" - Asia Pacific (Mumbai) + - "ap-east-1" + - Asia Pacific (Hong Kong) - "sa-east-1" - South America (Sao Paulo) Region. From 32a3ba9e3f85f2fc828c865889d2ca1e49c1028a Mon Sep 17 00:00:00 2001 From: Nick Craig-Wood Date: Thu, 2 Jan 2020 11:52:43 +0000 Subject: [PATCH 40/47] Add Outvi V to contributors --- docs/content/authors.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/content/authors.md b/docs/content/authors.md index 98114ecf8c83d..5c229222da062 100644 --- a/docs/content/authors.md +++ b/docs/content/authors.md @@ -317,3 +317,4 @@ Contributors * Fernando * David Cole * Wei He + * Outvi V <19144373+outloudvi@users.noreply.github.com> From 584e705c0cdfec0afdef4e9a1d3681809521a61e Mon Sep 17 00:00:00 2001 From: Thomas Kriechbaumer Date: Thu, 26 Dec 2019 11:05:00 +0000 Subject: [PATCH 41/47] s3: introduce list_chunk option for bucket listing The S3 ListObject API returns paginated bucket listings, with "MaxKeys" items for each GET call. The default value is 1000 entries, but for buckets with millions of objects it might make sense to request more elements per request, if the backend supports it. This commit adds a "list_chunk" option for the user to specify a lower or higher value. This commit does not add safe guards around this value - if a user decides to request a too large list, it might result in connection timeouts (on the server or client). In AWS S3, there is a fixed limit of 1000, some other services might have one too. In Ceph, this can be configured in RadosGW. --- backend/s3/s3.go | 16 +++++++++++++--- docs/content/s3.md | 15 +++++++++++++++ 2 files changed, 28 insertions(+), 3 deletions(-) diff --git a/backend/s3/s3.go b/backend/s3/s3.go index 04fdefe3a489a..4b7b93a276e51 100644 --- a/backend/s3/s3.go +++ b/backend/s3/s3.go @@ -793,6 +793,17 @@ WARNING: Storing parts of an incomplete multipart upload counts towards space us `, Default: false, Advanced: true, + }, { + Name: "list_chunk", + Help: `Size of listing chunk (response list for each ListObject S3 request). + +This option is also known as "MaxKeys", "max-items", or "page-size" from the AWS S3 specification. +Most services truncate the response list to 1000 objects even if requested more than that. +In AWS S3 this is a global maximum and cannot be changed, see [AWS S3](https://docs.aws.amazon.com/cli/latest/reference/s3/ls.html). +In Ceph, this can be increased with the "rgw list buckets max chunk" option. +`, + Default: 1000, + Advanced: true, }}, }) } @@ -801,7 +812,6 @@ WARNING: Storing parts of an incomplete multipart upload counts towards space us const ( metaMtime = "Mtime" // the meta key to store mtime in - eg X-Amz-Meta-Mtime metaMD5Hash = "Md5chksum" // the meta key to store md5hash in - listChunkSize = 1000 // number of items to read at once maxRetries = 10 // number of retries to make of operations maxSizeForCopy = 5 * 1024 * 1024 * 1024 // The maximum size of object we can COPY minChunkSize = fs.SizeSuffix(s3manager.MinUploadPartSize) @@ -834,6 +844,7 @@ type Options struct { V2Auth bool `config:"v2_auth"` UseAccelerateEndpoint bool `config:"use_accelerate_endpoint"` LeavePartsOnError bool `config:"leave_parts_on_error"` + ListChunk int64 `config:"list_chunk"` } // Fs represents a remote s3 server @@ -1260,7 +1271,6 @@ func (f *Fs) list(ctx context.Context, bucket, directory, prefix string, addBuck if directory != "" { directory += "/" } - maxKeys := int64(listChunkSize) delimiter := "" if !recurse { delimiter = "/" @@ -1288,7 +1298,7 @@ func (f *Fs) list(ctx context.Context, bucket, directory, prefix string, addBuck Bucket: &bucket, Delimiter: &delimiter, Prefix: &directory, - MaxKeys: &maxKeys, + MaxKeys: &f.opt.ListChunk, Marker: marker, } if urlEncodeListings { diff --git a/docs/content/s3.md b/docs/content/s3.md index d000c291cea1e..7ffdbed8c95d3 100644 --- a/docs/content/s3.md +++ b/docs/content/s3.md @@ -1134,6 +1134,21 @@ WARNING: Storing parts of an incomplete multipart upload counts towards space us - Type: bool - Default: false +#### --s3-list-chunk + +Size of listing chunk (response list for each ListObject S3 request). + +This option is also known as "MaxKeys", "max-items", or "page-size" from the AWS S3 specification. +Most services truncate the response list to 1000 objects even if requested more than that. +In AWS S3 this is a global maximum and cannot be changed, see [AWS S3](https://docs.aws.amazon.com/cli/latest/reference/s3/ls.html). +In Ceph, this can be increased with the "rgw list buckets max chunk" option. + + +- Config: list_chunk +- Env Var: RCLONE_S3_LIST_CHUNK +- Type: int +- Default: 1000 + ### Anonymous access to public buckets ### From 8a2d1dbe24fde2970aa8e643db9c36e8ace966c3 Mon Sep 17 00:00:00 2001 From: buengese Date: Sat, 28 Dec 2019 17:45:04 +0100 Subject: [PATCH 42/47] jottacloud: add support whitelabel versions --- backend/jottacloud/api/types.go | 31 +++++++++++++++++++++++ backend/jottacloud/jottacloud.go | 42 +++++++++++++++++++++----------- 2 files changed, 59 insertions(+), 14 deletions(-) diff --git a/backend/jottacloud/api/types.go b/backend/jottacloud/api/types.go index 83d4db738ec50..fb28d39fa0e47 100644 --- a/backend/jottacloud/api/types.go +++ b/backend/jottacloud/api/types.go @@ -54,6 +54,37 @@ type LoginToken struct { AuthToken string `json:"auth_token"` } +// WellKnown contains some configuration parameters for setting up endpoints +type WellKnown struct { + Issuer string `json:"issuer"` + AuthorizationEndpoint string `json:"authorization_endpoint"` + TokenEndpoint string `json:"token_endpoint"` + TokenIntrospectionEndpoint string `json:"token_introspection_endpoint"` + UserinfoEndpoint string `json:"userinfo_endpoint"` + EndSessionEndpoint string `json:"end_session_endpoint"` + JwksURI string `json:"jwks_uri"` + CheckSessionIframe string `json:"check_session_iframe"` + GrantTypesSupported []string `json:"grant_types_supported"` + ResponseTypesSupported []string `json:"response_types_supported"` + SubjectTypesSupported []string `json:"subject_types_supported"` + IDTokenSigningAlgValuesSupported []string `json:"id_token_signing_alg_values_supported"` + UserinfoSigningAlgValuesSupported []string `json:"userinfo_signing_alg_values_supported"` + RequestObjectSigningAlgValuesSupported []string `json:"request_object_signing_alg_values_supported"` + ResponseNodesSupported []string `json:"response_modes_supported"` + RegistrationEndpoint string `json:"registration_endpoint"` + TokenEndpointAuthMethodsSupported []string `json:"token_endpoint_auth_methods_supported"` + TokenEndpointAuthSigningAlgValuesSupported []string `json:"token_endpoint_auth_signing_alg_values_supported"` + ClaimsSupported []string `json:"claims_supported"` + ClaimTypesSupported []string `json:"claim_types_supported"` + ClaimsParameterSupported bool `json:"claims_parameter_supported"` + ScopesSupported []string `json:"scopes_supported"` + RequestParameterSupported bool `json:"request_parameter_supported"` + RequestURIParameterSupported bool `json:"request_uri_parameter_supported"` + CodeChallengeMethodsSupported []string `json:"code_challenge_methods_supported"` + TLSClientCertificateBoundAccessTokens bool `json:"tls_client_certificate_bound_access_tokens"` + IntrospectionEndpoint string `json:"introspection_endpoint"` +} + // TokenJSON is the struct representing the HTTP response from OAuth2 // providers returning a token in JSON form. type TokenJSON struct { diff --git a/backend/jottacloud/jottacloud.go b/backend/jottacloud/jottacloud.go index 91b987a57f038..64c6fff335c01 100644 --- a/backend/jottacloud/jottacloud.go +++ b/backend/jottacloud/jottacloud.go @@ -49,10 +49,11 @@ const ( rootURL = "https://www.jottacloud.com/jfs/" apiURL = "https://api.jottacloud.com/" baseURL = "https://www.jottacloud.com/" - tokenURL = "https://id.jottacloud.com/auth/realms/jottacloud/protocol/openid-connect/token" + defaultTokenURL = "https://id.jottacloud.com/auth/realms/jottacloud/protocol/openid-connect/token" cachePrefix = "rclone-jcmd5-" configDevice = "device" configMountpoint = "mountpoint" + configTokenURL = "tokenURL" configVersion = 1 ) @@ -61,8 +62,8 @@ var ( oauthConfig = &oauth2.Config{ ClientID: "jottacli", Endpoint: oauth2.Endpoint{ - AuthURL: tokenURL, - TokenURL: tokenURL, + AuthURL: defaultTokenURL, + TokenURL: defaultTokenURL, }, RedirectURL: oauthutil.RedirectLocalhostURL, } @@ -85,8 +86,6 @@ func init() { log.Fatalf("Failed to parse config version - corrupted config") } refresh = ver != configVersion - } else { - refresh = true } if refresh { @@ -109,7 +108,7 @@ func init() { fmt.Printf("Login Token> ") loginToken := config.ReadLine() - token, err := doAuth(ctx, srv, loginToken) + token, err := doAuth(ctx, srv, loginToken, m) if err != nil { log.Fatalf("Failed to get oauth token: %s", err) } @@ -244,12 +243,13 @@ func shouldRetry(resp *http.Response, err error) (bool, error) { } // doAuth runs the actual token request -func doAuth(ctx context.Context, srv *rest.Client, loginTokenBase64 string) (token oauth2.Token, err error) { +func doAuth(ctx context.Context, srv *rest.Client, loginTokenBase64 string, m configmap.Mapper) (token oauth2.Token, err error) { loginTokenBytes, err := base64.StdEncoding.DecodeString(loginTokenBase64) if err != nil { return token, err } + // decode login token var loginToken api.LoginToken decoder := json.NewDecoder(bytes.NewReader(loginTokenBytes)) err = decoder.Decode(&loginToken) @@ -257,17 +257,22 @@ func doAuth(ctx context.Context, srv *rest.Client, loginTokenBase64 string) (tok return token, err } - // we don't seem to need any data from this link but the API is not happy if skip it + // retrieve endpoint urls opts := rest.Opts{ - Method: "GET", - RootURL: loginToken.WellKnownLink, - NoResponse: true, + Method: "GET", + RootURL: loginToken.WellKnownLink, } - _, err = srv.Call(ctx, &opts) + var wellKnown api.WellKnown + _, err = srv.CallJSON(ctx, &opts, nil, &wellKnown) if err != nil { return token, err } + // save the tokenurl + oauthConfig.Endpoint.AuthURL = wellKnown.TokenEndpoint + oauthConfig.Endpoint.TokenURL = wellKnown.TokenEndpoint + m.Set(configTokenURL, wellKnown.TokenEndpoint) + // prepare out token request with username and password values := url.Values{} values.Set("client_id", "jottacli") @@ -459,6 +464,7 @@ func NewFs(name, root string, m configmap.Mapper) (fs.Fs, error) { return nil, err } + // Check config version var ok bool var version string if version, ok = m.Get("configVersion"); ok { @@ -472,15 +478,23 @@ func NewFs(name, root string, m configmap.Mapper) (fs.Fs, error) { return nil, errors.New("Outdated config - please reconfigure this backend") } - rootIsDir := strings.HasSuffix(root, "/") - root = parsePath(root) + // if custome endpoints are set use them else stick with defaults + if tokenURL, ok := m.Get(configTokenURL); ok { + oauthConfig.Endpoint.TokenURL = tokenURL + // jottacloud is weird. we need to use the tokenURL as authURL + oauthConfig.Endpoint.AuthURL = tokenURL + } + // Create OAuth Client baseClient := fshttp.NewClient(fs.Config) oAuthClient, ts, err := oauthutil.NewClientWithBaseClient(name, m, oauthConfig, baseClient) if err != nil { return nil, errors.Wrap(err, "Failed to configure Jottacloud oauth client") } + rootIsDir := strings.HasSuffix(root, "/") + root = parsePath(root) + f := &Fs{ name: name, root: root, From 69ac04fec939689f9724a9a3c8dcb8a18484d7d8 Mon Sep 17 00:00:00 2001 From: buengese Date: Sun, 29 Dec 2019 18:29:07 +0100 Subject: [PATCH 43/47] docs: add GetSky to list of supported providers --- README.md | 1 + docs/content/docs.md | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index bfaaffc63c50d..b40ab25370e69 100644 --- a/README.md +++ b/README.md @@ -34,6 +34,7 @@ Rclone *("rsync for cloud storage")* is a command line program to sync files and * Dreamhost [:page_facing_up:](https://rclone.org/s3/#dreamhost) * Dropbox [:page_facing_up:](https://rclone.org/dropbox/) * FTP [:page_facing_up:](https://rclone.org/ftp/) + * GetSky [:page_facing_up:](https://rclone.org/jottacloud/) * Google Cloud Storage [:page_facing_up:](https://rclone.org/googlecloudstorage/) * Google Drive [:page_facing_up:](https://rclone.org/drive/) * Google Photos [:page_facing_up:](https://rclone.org/googlephotos/) diff --git a/docs/content/docs.md b/docs/content/docs.md index b7c1948016ed0..5432af724a1eb 100644 --- a/docs/content/docs.md +++ b/docs/content/docs.md @@ -37,7 +37,7 @@ See the following for detailed instructions for * [Google Photos](/googlephotos/) * [HTTP](/http/) * [Hubic](/hubic/) - * [Jottacloud](/jottacloud/) + * [Jottacloud / GetSky.no](/jottacloud/) * [Koofr](/koofr/) * [Mail.ru Cloud](/mailru/) * [Mega](/mega/) From 0e64df4b4c0218ad269698e80f4650a4644161f7 Mon Sep 17 00:00:00 2001 From: Aleksandar Jankovic Date: Thu, 19 Dec 2019 10:28:35 +0100 Subject: [PATCH 44/47] fs/accounting: consistency cleanup --- fs/accounting/stats_groups.go | 138 ++++++++++++++++++---------------- 1 file changed, 73 insertions(+), 65 deletions(-) diff --git a/fs/accounting/stats_groups.go b/fs/accounting/stats_groups.go index d905bc3abdecd..96c54276b73e5 100644 --- a/fs/accounting/stats_groups.go +++ b/fs/accounting/stats_groups.go @@ -13,7 +13,15 @@ const globalStats = "global_stats" var groups *statsGroups -func listStats(ctx context.Context, in rc.Params) (rc.Params, error) { +func init() { + // Init stats container + groups = newStatsGroups() + + // Set the function pointer up in fs + fs.CountError = GlobalStats().Error +} + +func rcListStats(ctx context.Context, in rc.Params) (rc.Params, error) { out := make(rc.Params) out["groups"] = groups.names() @@ -21,65 +29,46 @@ func listStats(ctx context.Context, in rc.Params) (rc.Params, error) { return out, nil } -func remoteStats(ctx context.Context, in rc.Params) (rc.Params, error) { - // Check to see if we should filter by group. - group, err := in.GetString("group") - if rc.NotErrParamNotFound(err) { - return rc.Params{}, err - } - if group != "" { - return StatsGroup(group).RemoteStats() - } +func init() { + rc.Add(rc.Call{ + Path: "core/group-list", + Fn: rcListStats, + Title: "Returns list of stats.", + Help: ` +This returns list of stats groups currently in memory. - return groups.sum().RemoteStats() +Returns the following values: +` + "```" + ` +{ + "groups": an array of group names: + [ + "group1", + "group2", + ... + ] } - -func transferredStats(ctx context.Context, in rc.Params) (rc.Params, error) { - // Check to see if we should filter by group. - group, err := in.GetString("group") - if rc.NotErrParamNotFound(err) { - return rc.Params{}, err - } - - out := make(rc.Params) - if group != "" { - out["transferred"] = StatsGroup(group).Transferred() - } else { - out["transferred"] = groups.sum().Transferred() - } - - return out, nil +` + "```" + ` +`, + }) } -func resetStats(ctx context.Context, in rc.Params) (rc.Params, error) { +func rcRemoteStats(ctx context.Context, in rc.Params) (rc.Params, error) { // Check to see if we should filter by group. group, err := in.GetString("group") if rc.NotErrParamNotFound(err) { return rc.Params{}, err } - if group != "" { - stats := groups.get(group) - stats.ResetCounters() - stats.ResetErrors() - stats.PruneAllTransfers() - } else { - groups.clear() + return StatsGroup(group).RemoteStats() } - return rc.Params{}, nil + return groups.sum().RemoteStats() } func init() { - // Init stats container - groups = newStatsGroups() - - // Set the function pointer up in fs - fs.CountError = GlobalStats().Error - rc.Add(rc.Call{ Path: "core/stats", - Fn: remoteStats, + Fn: rcRemoteStats, Title: "Returns stats about current transfers.", Help: ` This returns all available stats: @@ -127,10 +116,29 @@ Values for "transferring", "checking" and "lastError" are only assigned if data The value for "eta" is null if an eta cannot be determined. `, }) +} +func rcTransferredStats(ctx context.Context, in rc.Params) (rc.Params, error) { + // Check to see if we should filter by group. + group, err := in.GetString("group") + if rc.NotErrParamNotFound(err) { + return rc.Params{}, err + } + + out := make(rc.Params) + if group != "" { + out["transferred"] = StatsGroup(group).Transferred() + } else { + out["transferred"] = groups.sum().Transferred() + } + + return out, nil +} + +func init() { rc.Add(rc.Call{ Path: "core/transferred", - Fn: transferredStats, + Fn: rcTransferredStats, Title: "Returns stats about completed transfers.", Help: ` This returns stats about completed transfers: @@ -165,31 +173,31 @@ Returns the following values: ` + "```" + ` `, }) +} - rc.Add(rc.Call{ - Path: "core/group-list", - Fn: listStats, - Title: "Returns list of stats.", - Help: ` -This returns list of stats groups currently in memory. +func rcResetStats(ctx context.Context, in rc.Params) (rc.Params, error) { + // Check to see if we should filter by group. + group, err := in.GetString("group") + if rc.NotErrParamNotFound(err) { + return rc.Params{}, err + } -Returns the following values: -` + "```" + ` -{ - "groups": an array of group names: - [ - "group1", - "group2", - ... - ] + if group != "" { + stats := groups.get(group) + stats.ResetCounters() + stats.ResetErrors() + stats.PruneAllTransfers() + } else { + groups.reset() + } + + return rc.Params{}, nil } -` + "```" + ` -`, - }) +func init() { rc.Add(rc.Call{ Path: "core/stats-reset", - Fn: resetStats, + Fn: rcResetStats, Title: "Reset stats.", Help: ` This clears counters, errors and finished transfers for all stats or specific @@ -304,7 +312,7 @@ func (sg *statsGroups) names() []string { return sg.order } -// get gets the stats for group, or nil if not found +// sum returns aggregate stats that contains summation of all groups. func (sg *statsGroups) sum() *StatsInfo { sg.mu.Lock() defer sg.mu.Unlock() @@ -328,7 +336,7 @@ func (sg *statsGroups) sum() *StatsInfo { return sum } -func (sg *statsGroups) clear() { +func (sg *statsGroups) reset() { sg.mu.Lock() defer sg.mu.Unlock() From b9fb313f71980bdd9b6a33e8396f40b0442cb2e7 Mon Sep 17 00:00:00 2001 From: Aleksandar Jankovic Date: Thu, 19 Dec 2019 11:16:22 +0100 Subject: [PATCH 45/47] fs/accounting: add option to delete stats Removed PruneAllTransfers because it had no use. startedTransfers are set to nil in ResetCounters. --- fs/accounting/stats.go | 14 ---- fs/accounting/stats_groups.go | 59 ++++++++++++++-- fs/accounting/stats_groups_test.go | 104 +++++++++++++++++++++++++++++ fs/accounting/stats_test.go | 22 ------ 4 files changed, 158 insertions(+), 41 deletions(-) create mode 100644 fs/accounting/stats_groups_test.go diff --git a/fs/accounting/stats.go b/fs/accounting/stats.go index 9f75026103efc..0ed536b147737 100644 --- a/fs/accounting/stats.go +++ b/fs/accounting/stats.go @@ -634,20 +634,6 @@ func (s *StatsInfo) RemoveTransfer(transfer *Transfer) { s.mu.Unlock() } -// PruneAllTransfers removes all finished transfers. -func (s *StatsInfo) PruneAllTransfers() { - s.mu.Lock() - for i := 0; i < len(s.startedTransfers); i++ { - tr := s.startedTransfers[i] - if tr.IsDone() { - s.removeTransfer(tr, i) - // i'th element is removed, recover iterator to not skip next element. - i-- - } - } - s.mu.Unlock() -} - // PruneTransfers makes sure there aren't too many old transfers by removing // single finished transfer. func (s *StatsInfo) PruneTransfers() { diff --git a/fs/accounting/stats_groups.go b/fs/accounting/stats_groups.go index 96c54276b73e5..c9606105289f8 100644 --- a/fs/accounting/stats_groups.go +++ b/fs/accounting/stats_groups.go @@ -184,9 +184,8 @@ func rcResetStats(ctx context.Context, in rc.Params) (rc.Params, error) { if group != "" { stats := groups.get(group) - stats.ResetCounters() stats.ResetErrors() - stats.PruneAllTransfers() + stats.ResetCounters() } else { groups.reset() } @@ -210,6 +209,35 @@ Parameters }) } +func rcDeleteStats(ctx context.Context, in rc.Params) (rc.Params, error) { + // Group name required because we only do single group. + group, err := in.GetString("group") + if rc.NotErrParamNotFound(err) { + return rc.Params{}, err + } + + if group != "" { + groups.delete(group) + } + + return rc.Params{}, nil +} + +func init() { + rc.Add(rc.Call{ + Path: "core/stats-delete", + Fn: rcDeleteStats, + Title: "Delete stats group.", + Help: ` +This deletes entire stats group + +Parameters + +- group - name of the stats group (string) +`, + }) +} + type statsGroupCtx int64 const statsGroupKey statsGroupCtx = 1 @@ -282,13 +310,13 @@ func (sg *statsGroups) set(group string, stats *StatsInfo) { // Limit number of groups kept in memory. if len(sg.order) >= fs.Config.MaxStatsGroups { group := sg.order[0] - fs.LogPrintf(fs.LogLevelInfo, nil, "Max number of stats groups reached removing %s", group) + //fs.LogPrintf(fs.LogLevelInfo, nil, "Max number of stats groups reached removing %s", group) delete(sg.m, group) r := (len(sg.order) - fs.Config.MaxStatsGroups) + 1 sg.order = sg.order[r:] } - // Exclude global stats from + // Exclude global stats from listing if group != globalStats { sg.order = append(sg.order, group) } @@ -343,9 +371,30 @@ func (sg *statsGroups) reset() { for _, stats := range sg.m { stats.ResetErrors() stats.ResetCounters() - stats.PruneAllTransfers() } sg.m = make(map[string]*StatsInfo) sg.order = nil } + +// delete removes all references to the group. +func (sg *statsGroups) delete(group string) { + sg.mu.Lock() + defer sg.mu.Unlock() + stats := sg.m[group] + if stats == nil { + return + } + stats.ResetErrors() + stats.ResetCounters() + delete(sg.m, group) + + // Remove group reference from the ordering slice. + tmp := sg.order[:0] + for _, g := range sg.order { + if g != group { + tmp = append(tmp, g) + } + } + sg.order = tmp +} diff --git a/fs/accounting/stats_groups_test.go b/fs/accounting/stats_groups_test.go new file mode 100644 index 0000000000000..b61f6fabdb96b --- /dev/null +++ b/fs/accounting/stats_groups_test.go @@ -0,0 +1,104 @@ +package accounting + +import ( + "fmt" + "runtime" + "testing" +) + +func TestStatsGroupOperations(t *testing.T) { + + t.Run("empty group returns nil", func(t *testing.T) { + t.Parallel() + sg := newStatsGroups() + sg.get("invalid-group") + }) + + t.Run("set assigns stats to group", func(t *testing.T) { + t.Parallel() + stats := NewStats() + sg := newStatsGroups() + sg.set("test", stats) + sg.set("test1", stats) + if len(sg.m) != len(sg.names()) || len(sg.m) != 2 { + t.Fatalf("Expected two stats got %d, %d", len(sg.m), len(sg.order)) + } + }) + + t.Run("get returns correct group", func(t *testing.T) { + t.Parallel() + stats := NewStats() + sg := newStatsGroups() + sg.set("test", stats) + sg.set("test1", stats) + got := sg.get("test") + if got != stats { + t.Fatal("get returns incorrect stats") + } + }) + + t.Run("sum returns correct values", func(t *testing.T) { + t.Parallel() + stats1 := NewStats() + stats1.bytes = 5 + stats1.errors = 5 + stats2 := NewStats() + sg := newStatsGroups() + sg.set("test1", stats1) + sg.set("test2", stats2) + sum := sg.sum() + if sum.bytes != stats1.bytes+stats2.bytes { + t.Fatalf("sum() => bytes %d, expected %d", sum.bytes, stats1.bytes+stats2.bytes) + } + if sum.errors != stats1.errors+stats2.errors { + t.Fatalf("sum() => errors %d, expected %d", sum.errors, stats1.errors+stats2.errors) + } + }) + + t.Run("delete removes stats", func(t *testing.T) { + t.Parallel() + stats := NewStats() + sg := newStatsGroups() + sg.set("test", stats) + sg.set("test1", stats) + sg.delete("test1") + if sg.get("test1") != nil { + t.Fatal("stats not deleted") + } + if len(sg.m) != len(sg.names()) || len(sg.m) != 1 { + t.Fatalf("Expected two stats got %d, %d", len(sg.m), len(sg.order)) + } + }) + + t.Run("memory is reclaimed", func(t *testing.T) { + var ( + count = 1000 + start, end runtime.MemStats + sg = newStatsGroups() + ) + + runtime.GC() + runtime.ReadMemStats(&start) + + for i := 0; i < count; i++ { + sg.set(fmt.Sprintf("test-%d", i), NewStats()) + } + + for i := 0; i < count; i++ { + sg.delete(fmt.Sprintf("test-%d", i)) + } + + runtime.GC() + runtime.ReadMemStats(&end) + + t.Log(fmt.Sprintf("%+v\n%+v", start, end)) + diff := percentDiff(start.HeapObjects, end.HeapObjects) + if diff > 1 || diff < 0 { + t.Errorf("HeapObjects = %d, expected %d", end.HeapObjects, start.HeapObjects) + } + }) +} + +func percentDiff(start, end uint64) uint64 { + return (start - end) * 100 / start +} diff --git a/fs/accounting/stats_test.go b/fs/accounting/stats_test.go index a6acf3fafe165..2e462cb74945f 100644 --- a/fs/accounting/stats_test.go +++ b/fs/accounting/stats_test.go @@ -431,25 +431,3 @@ func TestPruneTransfers(t *testing.T) { }) } } - -func TestPruneAllTransfers(t *testing.T) { - const transfers = 10 - - s := NewStats() - for i := int64(1); i <= int64(transfers); i++ { - s.AddTransfer(&Transfer{ - startedAt: time.Unix(i, 0), - completedAt: time.Unix(i+1, 0), - }) - } - - s.mu.Lock() - assert.Equal(t, transfers, len(s.startedTransfers)) - s.mu.Unlock() - - s.PruneAllTransfers() - - s.mu.Lock() - assert.Empty(t, s.startedTransfers) - s.mu.Unlock() -} From 2e0774f3cfebd4f33a3e3816e519eb3244fb075f Mon Sep 17 00:00:00 2001 From: Nick Craig-Wood Date: Fri, 3 Jan 2020 22:18:23 +0000 Subject: [PATCH 46/47] Add Thomas Kriechbaumer to contributors --- docs/content/authors.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/content/authors.md b/docs/content/authors.md index 5c229222da062..d616a46c93de8 100644 --- a/docs/content/authors.md +++ b/docs/content/authors.md @@ -318,3 +318,4 @@ Contributors * David Cole * Wei He * Outvi V <19144373+outloudvi@users.noreply.github.com> + * Thomas Kriechbaumer From 7e6fac8b1eaeccbf48e197e2c1f12627c3e69ca5 Mon Sep 17 00:00:00 2001 From: Nick Craig-Wood Date: Mon, 30 Dec 2019 23:17:06 +0000 Subject: [PATCH 47/47] s3: re-implement multipart upload to fix memory issues There have been quite a few reports of problems with the multipart uploader using too much memory and not retrying possible errors. Before this change the multipart uploader used the s3manager abstraction in the AWS SDK. There are numerous bug reports of this using up too much memory. This change re-implements a much simplified version of the s3manager code specialized for rclone's purposes. This should use much less memory and retry chunks properly. See: https://forum.rclone.org/t/memory-usage-s3-alike-to-glacier-without-big-directories/13563 See: https://forum.rclone.org/t/copy-from-local-to-s3-has-high-memory-usage/13405 See: https://forum.rclone.org/t/big-file-upload-to-s3-fails/13575 --- backend/s3/s3.go | 283 +++++++++++++++++++++++++++++++++++------------ 1 file changed, 215 insertions(+), 68 deletions(-) diff --git a/backend/s3/s3.go b/backend/s3/s3.go index 4b7b93a276e51..4aa3bd8a08cb2 100644 --- a/backend/s3/s3.go +++ b/backend/s3/s3.go @@ -14,7 +14,9 @@ What happens if you CTRL-C a multipart upload */ import ( + "bytes" "context" + "crypto/md5" "encoding/base64" "encoding/hex" "encoding/xml" @@ -24,6 +26,7 @@ import ( "net/url" "path" "regexp" + "sort" "strconv" "strings" "sync" @@ -39,7 +42,6 @@ import ( "github.com/aws/aws-sdk-go/aws/request" "github.com/aws/aws-sdk-go/aws/session" "github.com/aws/aws-sdk-go/service/s3" - "github.com/aws/aws-sdk-go/service/s3/s3manager" "github.com/ncw/swift" "github.com/pkg/errors" "github.com/rclone/rclone/fs" @@ -52,7 +54,9 @@ import ( "github.com/rclone/rclone/fs/walk" "github.com/rclone/rclone/lib/bucket" "github.com/rclone/rclone/lib/pacer" + "github.com/rclone/rclone/lib/readers" "github.com/rclone/rclone/lib/rest" + "golang.org/x/sync/errgroup" ) const enc = encodings.S3 @@ -814,7 +818,8 @@ const ( metaMD5Hash = "Md5chksum" // the meta key to store md5hash in maxRetries = 10 // number of retries to make of operations maxSizeForCopy = 5 * 1024 * 1024 * 1024 // The maximum size of object we can COPY - minChunkSize = fs.SizeSuffix(s3manager.MinUploadPartSize) + maxUploadParts = 10000 // maximum allowed number of parts in a multi-part upload + minChunkSize = fs.SizeSuffix(1024 * 1024 * 5) defaultUploadCutoff = fs.SizeSuffix(200 * 1024 * 1024) maxUploadCutoff = fs.SizeSuffix(5 * 1024 * 1024 * 1024) minSleep = 10 * time.Millisecond // In case of error, start at 10ms sleep. @@ -2023,6 +2028,193 @@ func (o *Object) Open(ctx context.Context, options ...fs.OpenOption) (in io.Read var warnStreamUpload sync.Once +func (o *Object) uploadMultipart(ctx context.Context, req *s3.PutObjectInput, size int64, in io.Reader) (err error) { + f := o.fs + + // make concurrency machinery + concurrency := f.opt.UploadConcurrency + if concurrency < 1 { + concurrency = 1 + } + bufs := make(chan []byte, concurrency) + defer func() { + // empty the channel on exit + close(bufs) + for range bufs { + } + }() + for i := 0; i < concurrency; i++ { + bufs <- nil + } + + // calculate size of parts + partSize := int(f.opt.ChunkSize) + + // size can be -1 here meaning we don't know the size of the incoming file. We use ChunkSize + // buffers here (default 5MB). With a maximum number of parts (10,000) this will be a file of + // 48GB which seems like a not too unreasonable limit. + if size == -1 { + warnStreamUpload.Do(func() { + fs.Logf(f, "Streaming uploads using chunk size %v will have maximum file size of %v", + f.opt.ChunkSize, fs.SizeSuffix(partSize*maxUploadParts)) + }) + } else { + // Adjust partSize until the number of parts is small enough. + if size/int64(partSize) >= maxUploadParts { + // Calculate partition size rounded up to the nearest MB + partSize = int((((size / maxUploadParts) >> 20) + 1) << 20) + } + } + + var cout *s3.CreateMultipartUploadOutput + err = f.pacer.Call(func() (bool, error) { + var err error + cout, err = f.c.CreateMultipartUploadWithContext(ctx, &s3.CreateMultipartUploadInput{ + Bucket: req.Bucket, + ACL: req.ACL, + Key: req.Key, + ContentType: req.ContentType, + Metadata: req.Metadata, + ServerSideEncryption: req.ServerSideEncryption, + SSEKMSKeyId: req.SSEKMSKeyId, + StorageClass: req.StorageClass, + }) + return f.shouldRetry(err) + }) + if err != nil { + return errors.Wrap(err, "multipart upload failed to initialise") + } + uid := cout.UploadId + + defer func() { + if o.fs.opt.LeavePartsOnError { + return + } + if err != nil { + // We can try to abort the upload, but ignore the error. + fs.Debugf(o, "Cancelling multipart upload") + errCancel := f.pacer.Call(func() (bool, error) { + _, err := f.c.AbortMultipartUploadWithContext(ctx, &s3.AbortMultipartUploadInput{ + Bucket: req.Bucket, + Key: req.Key, + UploadId: uid, + RequestPayer: req.RequestPayer, + }) + return f.shouldRetry(err) + }) + if errCancel != nil { + fs.Debugf(o, "Failed to cancel multipart upload: %v", errCancel) + } + } + }() + + var ( + g, gCtx = errgroup.WithContext(ctx) + finished = false + partsMu sync.Mutex // to protect parts + parts []*s3.CompletedPart + off int64 + ) + + for partNum := int64(1); !finished; partNum++ { + // Get a block of memory from the channel (which limits concurrency) + buf := <-bufs + if buf == nil { + buf = make([]byte, partSize) + } + + // Read the chunk + var n int + n, err = readers.ReadFill(in, buf) // this can never return 0, nil + if err == io.EOF { + if n == 0 { + break + } + finished = true + } else if err != nil { + return errors.Wrap(err, "multipart upload failed to read source") + } + buf = buf[:n] + + partNum := partNum + fs.Debugf(o, "multipart upload starting chunk %d size %v offset %v/%v", partNum, fs.SizeSuffix(n), fs.SizeSuffix(off), fs.SizeSuffix(size)) + off += int64(n) + g.Go(func() (err error) { + partLength := int64(len(buf)) + + // create checksum of buffer for integrity checking + md5sumBinary := md5.Sum(buf) + md5sum := base64.StdEncoding.EncodeToString(md5sumBinary[:]) + + err = f.pacer.Call(func() (bool, error) { + uploadPartReq := &s3.UploadPartInput{ + Body: bytes.NewReader(buf), + Bucket: req.Bucket, + Key: req.Key, + PartNumber: &partNum, + UploadId: uid, + ContentMD5: &md5sum, + ContentLength: &partLength, + RequestPayer: req.RequestPayer, + SSECustomerAlgorithm: req.SSECustomerAlgorithm, + SSECustomerKey: req.SSECustomerKey, + SSECustomerKeyMD5: req.SSECustomerKeyMD5, + } + uout, err := f.c.UploadPartWithContext(gCtx, uploadPartReq) + if err != nil { + if partNum <= int64(concurrency) { + return f.shouldRetry(err) + } + // retry all chunks once have done the first batch + return true, err + } + partsMu.Lock() + parts = append(parts, &s3.CompletedPart{ + PartNumber: &partNum, + ETag: uout.ETag, + }) + partsMu.Unlock() + + return false, nil + }) + + // return the memory + bufs <- buf[:partSize] + + if err != nil { + return errors.Wrap(err, "multipart upload failed to upload part") + } + return nil + }) + } + err = g.Wait() + if err != nil { + return err + } + + // sort the completed parts by part number + sort.Slice(parts, func(i, j int) bool { + return *parts[i].PartNumber < *parts[j].PartNumber + }) + + err = f.pacer.Call(func() (bool, error) { + _, err := f.c.CompleteMultipartUploadWithContext(ctx, &s3.CompleteMultipartUploadInput{ + Bucket: req.Bucket, + Key: req.Key, + MultipartUpload: &s3.CompletedMultipartUpload{ + Parts: parts, + }, + RequestPayer: req.RequestPayer, + UploadId: uid, + }) + return f.shouldRetry(err) + }) + if err != nil { + return errors.Wrap(err, "multipart upload failed to finalise") + } + return nil +} + // Update the Object from in with modTime and size func (o *Object) Update(ctx context.Context, in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) error { bucket, bucketPath := o.split() @@ -2034,31 +2226,6 @@ func (o *Object) Update(ctx context.Context, in io.Reader, src fs.ObjectInfo, op size := src.Size() multipart := size < 0 || size >= int64(o.fs.opt.UploadCutoff) - var uploader *s3manager.Uploader - if multipart { - uploader = s3manager.NewUploader(o.fs.ses, func(u *s3manager.Uploader) { - u.Concurrency = o.fs.opt.UploadConcurrency - u.LeavePartsOnError = o.fs.opt.LeavePartsOnError - u.S3 = o.fs.c - u.PartSize = int64(o.fs.opt.ChunkSize) - - // size can be -1 here meaning we don't know the size of the incoming file. We use ChunkSize - // buffers here (default 5MB). With a maximum number of parts (10,000) this will be a file of - // 48GB which seems like a not too unreasonable limit. - if size == -1 { - warnStreamUpload.Do(func() { - fs.Logf(o.fs, "Streaming uploads using chunk size %v will have maximum file size of %v", - o.fs.opt.ChunkSize, fs.SizeSuffix(u.PartSize*s3manager.MaxUploadParts)) - }) - return - } - // Adjust PartSize until the number of parts is small enough. - if size/u.PartSize >= s3manager.MaxUploadParts { - // Calculate partition size rounded up to the nearest MB - u.PartSize = (((size / s3manager.MaxUploadParts) >> 20) + 1) << 20 - } - }) - } // Set the mtime in the meta data metadata := map[string]*string{ @@ -2083,52 +2250,32 @@ func (o *Object) Update(ctx context.Context, in io.Reader, src fs.ObjectInfo, op // Guess the content type mimeType := fs.MimeType(ctx, src) + req := s3.PutObjectInput{ + Bucket: &bucket, + ACL: &o.fs.opt.ACL, + Key: &bucketPath, + ContentType: &mimeType, + Metadata: metadata, + } + if md5sum != "" { + req.ContentMD5 = &md5sum + } + if o.fs.opt.ServerSideEncryption != "" { + req.ServerSideEncryption = &o.fs.opt.ServerSideEncryption + } + if o.fs.opt.SSEKMSKeyID != "" { + req.SSEKMSKeyId = &o.fs.opt.SSEKMSKeyID + } + if o.fs.opt.StorageClass != "" { + req.StorageClass = &o.fs.opt.StorageClass + } + if multipart { - req := s3manager.UploadInput{ - Bucket: &bucket, - ACL: &o.fs.opt.ACL, - Key: &bucketPath, - Body: in, - ContentType: &mimeType, - Metadata: metadata, - //ContentLength: &size, - } - if o.fs.opt.ServerSideEncryption != "" { - req.ServerSideEncryption = &o.fs.opt.ServerSideEncryption - } - if o.fs.opt.SSEKMSKeyID != "" { - req.SSEKMSKeyId = &o.fs.opt.SSEKMSKeyID - } - if o.fs.opt.StorageClass != "" { - req.StorageClass = &o.fs.opt.StorageClass - } - err = o.fs.pacer.CallNoRetry(func() (bool, error) { - _, err = uploader.UploadWithContext(ctx, &req) - return o.fs.shouldRetry(err) - }) + err = o.uploadMultipart(ctx, &req, size, in) if err != nil { return err } } else { - req := s3.PutObjectInput{ - Bucket: &bucket, - ACL: &o.fs.opt.ACL, - Key: &bucketPath, - ContentType: &mimeType, - Metadata: metadata, - } - if md5sum != "" { - req.ContentMD5 = &md5sum - } - if o.fs.opt.ServerSideEncryption != "" { - req.ServerSideEncryption = &o.fs.opt.ServerSideEncryption - } - if o.fs.opt.SSEKMSKeyID != "" { - req.SSEKMSKeyId = &o.fs.opt.SSEKMSKeyID - } - if o.fs.opt.StorageClass != "" { - req.StorageClass = &o.fs.opt.StorageClass - } // Create the request putObj, _ := o.fs.c.PutObjectRequest(&req)