From ac38c989e06aee2c548435433e80657316b21f11 Mon Sep 17 00:00:00 2001 From: Alex Aizman Date: Sat, 23 Nov 2024 11:37:23 -0500 Subject: [PATCH] err-move-to-virt-dir: do not run filesystem health check; path errors * add 'err-move-to-virt-dir' - type and helpers - do not count it as an io-error; don't run FSHC * add `IsPathErr` and `CheckMvToVirtDir` * with minor refactoring Signed-off-by: Alex Aizman --- ais/target.go | 3 +++ ais/tgtfshc.go | 5 ++++- ais/tgtimpl.go | 6 +++++- ais/tgtobj.go | 14 ++++++++------ cmn/cos/err.go | 45 ++++++++++++++++++++++++++++++++++++++++++++ cmn/cos/err_linux.go | 4 ++++ cmn/cos/ioutils.go | 11 ++--------- cmn/err.go | 5 +++++ core/lfile.go | 5 ++++- core/lom.go | 6 ++++++ xact/base.go | 4 ++-- xact/xs/prefetch.go | 6 +++--- 12 files changed, 91 insertions(+), 23 deletions(-) diff --git a/ais/target.go b/ais/target.go index c50082782c..607ebfe615 100644 --- a/ais/target.go +++ b/ais/target.go @@ -766,6 +766,9 @@ func (t *target) getObject(w http.ResponseWriter, r *http.Request, dpq *dpq, bck t.statsT.IncErr(stats.ErrGetCount) if goi.isIOErr { t.statsT.IncErr(stats.IOErrGetCount) + if cmn.Rom.FastV(4, cos.SmoduleAIS) { + nlog.Warningln("io-error [", err, "]", goi.lom.String()) + } } // handle right here, return nil diff --git a/ais/tgtfshc.go b/ais/tgtfshc.go index 5064e9b25f..61f90f9782 100644 --- a/ais/tgtfshc.go +++ b/ais/tgtfshc.go @@ -5,6 +5,8 @@ package ais import ( + "fmt" + "github.com/NVIDIA/aistore/cmn" "github.com/NVIDIA/aistore/cmn/cos" "github.com/NVIDIA/aistore/cmn/debug" @@ -47,7 +49,8 @@ func (t *target) FSHC(err error, mi *fs.Mountpath, fqn string) { return } - nlog.Errorf("%s: waking up FSHC to check %s, err: %v", t, mi, err) + warn := fmt.Sprintf("%s: waking up FSHC to check %s, err: %v", t, mi, err) + nlog.ErrorDepth(1, warn) // // counting I/O errors on a per mountpath diff --git a/ais/tgtimpl.go b/ais/tgtimpl.go index 7b02bc1b22..3788ec034c 100644 --- a/ais/tgtimpl.go +++ b/ais/tgtimpl.go @@ -167,7 +167,11 @@ func (t *target) GetCold(ctx context.Context, lom *core.LOM, owt cmn.OWT) (ecode if owt != cmn.OwtGetPrefetchLock { lom.Unlock(true) } - nlog.Infoln(t.String()+":", "failed to GET remote", lom.Cname()+":", err, ecode) + if cmn.IsErrFailedTo(err) { + nlog.Warningln(err) + } else { + nlog.Warningln("failed to GET remote", lom.Cname(), "[", err, ecode, "]") + } return ecode, err } diff --git a/ais/tgtobj.go b/ais/tgtobj.go index d2bfccebf1..679b66b5d2 100644 --- a/ais/tgtobj.go +++ b/ais/tgtobj.go @@ -225,8 +225,12 @@ func (poi *putOI) putObject() (ecode int, err error) { rerr: if poi.owt == cmn.OwtPut && poi.restful && !poi.t2t { poi.t.statsT.IncErr(stats.ErrPutCount) - if err != cmn.ErrSkip && !poi.remoteErr && err != io.ErrUnexpectedEOF && !cos.IsRetriableConnErr(err) { + if err != cmn.ErrSkip && !poi.remoteErr && err != io.ErrUnexpectedEOF && + !cos.IsRetriableConnErr(err) && !cos.IsErrMvToVirtDir(err) { poi.t.statsT.IncErr(stats.IOErrPutCount) + if cmn.Rom.FastV(4, cos.SmoduleAIS) { + nlog.Warningln("io-error [", err, "]", poi.loghdr()) + } } } return ecode, err @@ -346,9 +350,7 @@ func (poi *putOI) fini() (ecode int, err error) { // do nothing: lom is already wlocked case cmn.OwtGetPrefetchLock: if !lom.TryLock(true) { - if cmn.Rom.FastV(4, cos.SmoduleAIS) { - nlog.Warningln(poi.loghdr(), "is busy") - } + nlog.Warningln(poi.loghdr(), "is busy") return 0, cmn.ErrSkip // e.g. prefetch can skip it and keep on going } defer lom.Unlock(true) @@ -676,7 +678,7 @@ do: goi.lom.Unlock(true) goi.unlocked = true if !cos.IsNotExist(res.Err, res.ErrCode) { - nlog.Infoln(ftcg+"(read)", goi.lom.Cname(), res.Err, res.ErrCode) + nlog.Infoln(ftcg, "(read)", goi.lom.Cname(), res.Err, res.ErrCode) } return res.ErrCode, res.Err } @@ -742,7 +744,7 @@ func (goi *getOI) _coldPut(res *core.GetReaderResult) (int, error) { if err != nil { lom.Unlock(true) - nlog.Infoln(ftcg+"(put)", lom.Cname(), err) + nlog.Infoln(ftcg, "(put)", lom.Cname(), err) return code, err } diff --git a/cmn/cos/err.go b/cmn/cos/err.go index d1fdc835bf..4827c1a95a 100644 --- a/cmn/cos/err.go +++ b/cmn/cos/err.go @@ -8,10 +8,12 @@ import ( "context" "errors" "fmt" + iofs "io/fs" "net" "net/http" "net/url" "os" + "path/filepath" "sync" ratomic "sync/atomic" "syscall" @@ -32,6 +34,9 @@ type ( cnt int64 mu sync.Mutex } + ErrMvToVirtDir struct { + dst string + } ) var ( @@ -150,6 +155,13 @@ func IsErrSyscallTimeout(err error) bool { return ok && syscallErr.Timeout() } +func IsPathErr(err error) (ok bool) { + if pathErr := (*iofs.PathError)(nil); errors.As(err, &pathErr) { + ok = true + } + return +} + // likely out of socket descriptors func IsErrConnectionNotAvail(err error) (yes bool) { return errors.Is(err, syscall.EADDRNOTAVAIL) @@ -210,3 +222,36 @@ func IsErrClientURLTimeout(err error) bool { uerr := Err2ClientURLErr(err) return uerr != nil && uerr.Timeout() } + +// +// ErrMvToVirtDir +// NOTE [design tradeoff] keeping objects under (e.g.) their respective sha256, etc. +// + +func CheckMvToVirtDir(err error, dst string) error { + if IsErrMvToVirtDir(err) { + return err + } + if os.IsExist(err) { + if finfo, errN := os.Stat(dst); errN == nil && finfo.IsDir() { + return &ErrMvToVirtDir{dst} + } + } + return err +} + +func IsErrMvToVirtDir(err error) bool { + _, ok := err.(*ErrMvToVirtDir) + return ok +} + +func (e *ErrMvToVirtDir) Error() string { + var ( + b = filepath.Base(e.dst) + d string + ) + if l, lb := len(e.dst), len(b); lb > 1 && l > lb+8 { + d = filepath.Base(e.dst[0 : l-lb]) + } + return fmt.Sprintf("destination '../%s/%s' exists and is a virtual directory", d, b) +} diff --git a/cmn/cos/err_linux.go b/cmn/cos/err_linux.go index c2cdab3bd9..fa3e21d517 100644 --- a/cmn/cos/err_linux.go +++ b/cmn/cos/err_linux.go @@ -38,6 +38,10 @@ var ioErrs = [...]error{ func IsIOError(err error) bool { debug.Assert(err != nil) + if IsErrMvToVirtDir(err) { + return false + } + // via os.NewSyscallError(), with a prior check !os.IsNotExist() if e, ok := err.(*os.SyscallError); ok { nlog.Infoln("by syscall-error", e) diff --git a/cmn/cos/ioutils.go b/cmn/cos/ioutils.go index 13df10056d..9013149f67 100644 --- a/cmn/cos/ioutils.go +++ b/cmn/cos/ioutils.go @@ -95,18 +95,11 @@ func Rename(src, dst string) (err error) { return nil } if !os.IsNotExist(err) { - if os.IsExist(err) { - if finfo, errN := os.Stat(dst); errN == nil && finfo.IsDir() { - // [design tradeoff] keeping objects under (e.g.) their respective sha256 - // would eliminate this one, in part - return fmt.Errorf("move destination '../%s' already exists (and is a virtual directory)", filepath.Base(dst)) - } - } - return err + return CheckMvToVirtDir(err, dst) } // create and retry (slow path) err = CreateDir(filepath.Dir(dst)) - if err == nil { + if err == nil || os.IsExist(err) /*race*/ { err = os.Rename(src, dst) } return err diff --git a/cmn/err.go b/cmn/err.go index a832d2b92b..1989907ea9 100644 --- a/cmn/err.go +++ b/cmn/err.go @@ -291,6 +291,11 @@ func (e *ErrFailedTo) Error() string { func (e *ErrFailedTo) Unwrap() (err error) { return e.err } +func IsErrFailedTo(err error) bool { + _, ok := err.(*ErrFailedTo) + return ok +} + // ErrStreamTerminated func NewErrStreamTerminated(stream string, err error, reason, detail string) *ErrStreamTerminated { diff --git a/core/lfile.go b/core/lfile.go index 47d0bda15f..6f0335ef87 100644 --- a/core/lfile.go +++ b/core/lfile.go @@ -66,6 +66,7 @@ func (lom *LOM) _cf(fqn string) (fh *os.File, err error) { return fh, nil } if !os.IsNotExist(err) { + // TODO: cos.CheckMvToVirtDir(err, fqn) T.FSHC(err, lom.Mountpath(), "") return nil, err } @@ -145,7 +146,9 @@ func (lom *LOM) RenameFinalize(wfqn string) error { return &errBdir{cname: lom.Cname(), err: err} } if err := lom.RenameToMain(wfqn); err != nil { - T.FSHC(err, lom.Mountpath(), wfqn) + if !cos.IsErrMvToVirtDir(err) { + T.FSHC(err, lom.Mountpath(), wfqn) + } return cmn.NewErrFailedTo(T, "finalize", lom.Cname(), err) } return nil diff --git a/core/lom.go b/core/lom.go index 36db14f4a8..ee29a19fb5 100644 --- a/core/lom.go +++ b/core/lom.go @@ -557,6 +557,12 @@ func (lom *LOM) FromFS() error { size, atimefs, _, err := lom.Fstat(true /*get-atime*/) if err != nil { if !os.IsNotExist(err) { + if cos.IsPathErr(err) && strings.Contains(err.Error(), "not a directory") { + // e.g. err "stat .../aaa/111: not a directory" when there's existing ".../aaa" object + err := fmt.Errorf("%w (path error)", err) + nlog.Errorln(err) + return err + } err = os.NewSyscallError("stat", err) T.FSHC(err, lom.Mountpath(), lom.FQN) } diff --git a/xact/base.go b/xact/base.go index 3dcb152e5a..45fd6476c2 100644 --- a/xact/base.go +++ b/xact/base.go @@ -339,9 +339,9 @@ func (xctn *Base) Finish() { case err == nil: nlog.Infoln(xctn.String(), "finished") case aborted: - nlog.Warningln(xctn.String(), "aborted:", err.Error(), info) + nlog.Warningln(xctn.String(), "aborted:", err, info) default: - nlog.Infoln("Warning:", xctn.String(), "finished w/err:", err.Error()) + nlog.Warningln(xctn.String(), "finished w/err:", err) } } diff --git a/xact/xs/prefetch.go b/xact/xs/prefetch.go index e7549afe85..a77529c1de 100644 --- a/xact/xs/prefetch.go +++ b/xact/xs/prefetch.go @@ -296,11 +296,11 @@ outer: nlog.Warningln(xname, "::", xblob.String(), "[", msg.String(), err, "]") default: if xblob.Size() >= cos.GiB/2 || cmn.Rom.FastV(4, cos.SmoduleXs) { - var s string if n := int(pebl.num()); n > 0 { - s = " (num-pending " + strconv.Itoa(n) + ")" + nlog.Infoln(xname, "::", xblob.String(), "( num-pending", strconv.Itoa(n), ")") + } else { + nlog.Infoln(xname, "::", xblob.String()) } - nlog.Infoln(xname, "::", xblob.String(), s) } } }