diff --git a/pkg/api/task.go b/pkg/api/task.go index 166e974..639511f 100644 --- a/pkg/api/task.go +++ b/pkg/api/task.go @@ -56,11 +56,13 @@ func findMetadata(w http.ResponseWriter, r *http.Request) { x := r.URL.Query().Get("x") ehdl := r.URL.Query().Get("ehdl") hath := r.URL.Query().Get("hath") + fuzzy := r.URL.Query().Get("fuzzy") metaTypes := make(map[metadata.MetaType]bool) metaTypes[metadata.XMeta] = x == "true" metaTypes[metadata.EHDLMeta] = ehdl == "true" metaTypes[metadata.HathMeta] = hath == "true" + metaTypes[metadata.FuzzyMatch] = fuzzy == "true" go metadata.ParseMetadata(metaTypes) if title == "true" { diff --git a/pkg/db/user.go b/pkg/db/user.go index b89b82f..a59daf8 100644 --- a/pkg/db/user.go +++ b/pkg/db/user.go @@ -5,8 +5,8 @@ import ( "database/sql" "encoding/base64" "github.com/Mangatsu/server/pkg/log" - "github.com/Mangatsu/server/pkg/types/model" - . "github.com/Mangatsu/server/pkg/types/table" + "github.com/Mangatsu/server/pkg/types/sqlite/model" + . "github.com/Mangatsu/server/pkg/types/sqlite/table" "github.com/Mangatsu/server/pkg/utils" . "github.com/go-jet/jet/v2/sqlite" "github.com/google/uuid" diff --git a/pkg/library/scan.go b/pkg/library/scan.go index 61c7b96..1d2dc0c 100644 --- a/pkg/library/scan.go +++ b/pkg/library/scan.go @@ -17,6 +17,15 @@ import ( func countImages(archivePath string) (uint64, error) { filesystem, err := archiver.FileSystem(nil, archivePath) + if err != nil { + log.Z.Error("could not open archive", + zap.String("path", archivePath), + zap.String("err", err.Error()), + ) + + return 0, err + } + var fileCount uint64 err = fs.WalkDir(filesystem, ".", func(s string, d fs.DirEntry, err error) error { diff --git a/pkg/metadata/ehdl.go b/pkg/metadata/ehdl.go index b8eef34..c416722 100644 --- a/pkg/metadata/ehdl.go +++ b/pkg/metadata/ehdl.go @@ -2,7 +2,7 @@ package metadata import ( "bufio" - "os" + "bytes" "regexp" "strconv" "strings" @@ -17,24 +17,17 @@ var sizeRegex = regexp.MustCompile(`File Size:\s*(\d+(?:\.\d+)?)`) // ParseEHDL parses given text file. Input file is expected to be in the H@H (Hath) format (galleryinfo.txt). // Input file is expected to be in the E-Hentai-Downloader format (info.txt). -func ParseEHDL(filePath string) (model.Gallery, []model.Tag, error) { - file, err := os.Open(filePath) +func ParseEHDL(metaPath string, metaData []byte, internal bool) (model.Gallery, []model.Tag, model.Reference, error) { gallery := model.Gallery{} - reference := model.Reference{} - var tags []model.Tag - - if err != nil { - return gallery, nil, err + reference := model.Reference{ + MetaPath: &metaPath, + MetaInternal: internal, + Urls: nil, } + var tags []model.Tag - defer func(file *os.File) { - err := file.Close() - if err != nil { - log.Z.Debug("failed to close EHDL formatted file", zap.String("err", err.Error())) - } - }(file) - - scanner := bufio.NewScanner(file) + buffer := bytes.NewBuffer(metaData) + scanner := bufio.NewScanner(buffer) lineNumber := -1 for scanner.Scan() { @@ -126,5 +119,5 @@ func ParseEHDL(filePath string) (model.Gallery, []model.Tag, error) { } } - return gallery, tags, nil + return gallery, tags, reference, nil } diff --git a/pkg/metadata/hath.go b/pkg/metadata/hath.go index 9cff9bb..07f5b8c 100644 --- a/pkg/metadata/hath.go +++ b/pkg/metadata/hath.go @@ -2,30 +2,24 @@ package metadata import ( "bufio" - "os" + "bytes" "strings" "github.com/Mangatsu/server/pkg/types/sqlite/model" ) // ParseHath parses given text file. Input file is expected to be in the H@H (Hath) format (galleryinfo.txt). -func ParseHath(filePath string) (model.Gallery, []model.Tag, error) { - file, err := os.Open(filePath) +func ParseHath(metaPath string, metaData []byte, internal bool) (model.Gallery, []model.Tag, model.Reference, error) { gallery := model.Gallery{} - var tags []model.Tag - - if err != nil { - return gallery, nil, err + reference := model.Reference{ + MetaPath: &metaPath, + MetaInternal: internal, + Urls: nil, } + var tags []model.Tag - defer func(file *os.File) { - err := file.Close() - if err != nil { - log.Z.Debug("failed to close Hath formatted file", zap.String("err", err.Error())) - } - }(file) - - scanner := bufio.NewScanner(file) + buffer := bytes.NewBuffer(metaData) + scanner := bufio.NewScanner(buffer) for scanner.Scan() { line := scanner.Text() @@ -59,5 +53,5 @@ func ParseHath(filePath string) (model.Gallery, []model.Tag, error) { } } - return gallery, tags, nil + return gallery, tags, reference, nil } diff --git a/pkg/metadata/scan.go b/pkg/metadata/scan.go index 5a4cc2b..374eaea 100644 --- a/pkg/metadata/scan.go +++ b/pkg/metadata/scan.go @@ -22,11 +22,17 @@ import ( type MetaType string const ( - XMeta MetaType = "xmeta" - HathMeta = "hathmeta" - EHDLMeta = "ehdlmeta" + XMeta MetaType = "xmeta" + HathMeta = "hathmeta" + EHDLMeta = "ehdlmeta" + FuzzyMatch = "fuzzy" ) +type NoMatchPaths struct { + libraryPath string + fullPath string +} + // matchInternalMeta reads the internal metadata (info.json, info.txt or galleryinfo.txt) from the given archive. func matchInternalMeta(metaTypes map[MetaType]bool, fullArchivePath string) ([]byte, string, MetaType) { filesystem, err := archiver.FileSystem(nil, fullArchivePath) @@ -77,7 +83,6 @@ func matchExternalMeta(metaTypes map[MetaType]bool, fullArchivePath string, libr externalJSON := constants.ArchiveExtensions.ReplaceAllString(fullArchivePath, ".json") if !utils.PathExists(externalJSON) { - archivesNoMatch = append(archivesNoMatch, NoMatchPaths{libraryPath: libraryPath, fullPath: fullArchivePath}) return nil, "" } @@ -100,86 +105,106 @@ func ParseMetadata(metaTypes map[MetaType]bool) { return } + var archivesWithNoMatch []NoMatchPaths + for _, galleryLibrary := range libraries { for _, gallery := range galleryLibrary.Galleries { fullPath := config.BuildLibraryPath(galleryLibrary.Path, gallery.ArchivePath) var metaData []byte var metaPath string - internal := false + internalDataFound := false // X, Hath, EHDL metaData, metaPath, metaType := matchInternalMeta(metaTypes, fullPath) if metaData != nil { - internal = true + internalDataFound = true } // X - if !internal { + if !internalDataFound { metaData, metaPath = matchExternalMeta(metaTypes, fullPath, galleryLibrary.Path) metaType = XMeta } - if metaData != nil { - var newGallery model.Gallery - var tags []model.Tag - var reference model.Reference - - switch metaType { - case XMeta: - if newGallery, tags, reference, err = ParseX(metaData, metaPath, gallery.ArchivePath, internal); err != nil { - log.Z.Debug("could not parse X meta", - zap.String("path", metaPath), - zap.String("err", err.Error())) - - cache.ProcessingStatusCache.AddMetadataError(gallery.UUID, err.Error(), map[string]string{ - "metaType": string(metaType), - "metaPath": metaPath, - }) - continue - } - case EHDLMeta: - if newGallery, tags, err = ParseEHDL(metaPath); err != nil { - log.Z.Debug("could not parse EHDL meta", - zap.String("path", metaPath), - zap.String("err", err.Error())) - - cache.ProcessingStatusCache.AddMetadataError(gallery.UUID, err.Error(), map[string]string{ - "metaType": string(metaType), - "metaPath": metaPath, - }) - continue - } - case HathMeta: - if newGallery, tags, err = ParseHath(metaPath); err != nil { - log.Z.Debug("could not parse Hath meta", - zap.String("path", metaPath), - zap.String("err", err.Error())) - - cache.ProcessingStatusCache.AddMetadataError(gallery.UUID, err.Error(), map[string]string{ - "metaType": string(metaType), - "metaPath": metaPath, - }) - continue - } + if metaData == nil { + if metaTypes[FuzzyMatch] { + archivesWithNoMatch = append(archivesWithNoMatch, NoMatchPaths{libraryPath: galleryLibrary.Path, fullPath: fullPath}) } + continue + } - if err = db.UpdateGallery(newGallery, tags, reference, true); err != nil { - log.Z.Debug("could not tag gallery", - zap.String("path", gallery.ArchivePath), + var newGallery model.Gallery + var tags []model.Tag + var reference model.Reference + + switch metaType { + case XMeta: + if newGallery, tags, reference, err = ParseX(metaData, metaPath, gallery.ArchivePath, internalDataFound); err != nil { + log.Z.Debug("could not parse X meta", + zap.String("path", metaPath), zap.String("err", err.Error())) - cache.ProcessingStatusCache.AddMetadataError(newGallery.UUID, err.Error(), map[string]string{ - "path": gallery.ArchivePath, + cache.ProcessingStatusCache.AddMetadataError(gallery.UUID, err.Error(), map[string]string{ + "metaType": string(metaType), + "metaPath": metaPath, + }) + continue + } + case EHDLMeta: + if newGallery, tags, reference, err = ParseEHDL(metaPath, metaData, internalDataFound); err != nil { + log.Z.Debug("could not parse EHDL meta", + zap.String("path", metaPath), + zap.String("err", err.Error())) + + cache.ProcessingStatusCache.AddMetadataError(gallery.UUID, err.Error(), map[string]string{ + "metaType": string(metaType), + "metaPath": metaPath, + }) + continue + } + case HathMeta: + if newGallery, tags, reference, err = ParseHath(metaPath, metaData, internalDataFound); err != nil { + log.Z.Debug("could not parse Hath meta", + zap.String("path", metaPath), + zap.String("err", err.Error())) + + cache.ProcessingStatusCache.AddMetadataError(gallery.UUID, err.Error(), map[string]string{ + "metaType": string(metaType), + "metaPath": metaPath, }) continue } } + + // Adds the UUID and archive path to the new gallery. + newGallery.UUID = gallery.UUID + newGallery.ArchivePath = gallery.ArchivePath + + err = db.UpdateGallery(newGallery, tags, reference, true) + if err != nil { + log.Z.Debug("could not tag gallery", + zap.String("path", gallery.ArchivePath), + zap.String("err", err.Error())) + + cache.ProcessingStatusCache.AddMetadataError(newGallery.UUID, err.Error(), map[string]string{ + "path": gallery.ArchivePath, + }) + continue + } + + log.Z.Info("metadata parsed", + zap.String("metaType", string(metaType)), + zap.String("uuid", gallery.UUID), + zap.String("title", gallery.Title), + zap.String("path", gallery.ArchivePath), + zap.String("metaPath", metaPath), + ) } } // Fuzzy parsing for all archives that didn't have an exact match. - for _, noMatch := range archivesNoMatch { + for _, noMatch := range archivesWithNoMatch { onlyDir := filepath.Dir(noMatch.fullPath) files, err := os.ReadDir(onlyDir) if err != nil { @@ -191,7 +216,7 @@ func ParseMetadata(metaTypes map[MetaType]bool) { for _, f := range files { r, exhGallery := fuzzyMatchExternalMeta(noMatch.fullPath, noMatch.libraryPath, f) - if r.MatchedArchivePath != "" && r.MetaTitleMatch || r.Similarity > 0.70 { + if r.MatchedArchivePath != "" && r.MetaTitleMatch || r.Similarity > config.Options.GalleryOptions.FuzzySearchSimilarity { gallery, tags, reference := convertExh(exhGallery, r.MatchedArchivePath, r.RelativeMetaPath, false) if !r.MetaTitleMatch { diff --git a/pkg/metadata/title.go b/pkg/metadata/title.go index 70dda19..1fb7a5e 100644 --- a/pkg/metadata/title.go +++ b/pkg/metadata/title.go @@ -3,7 +3,6 @@ package metadata import ( "path" "path/filepath" - "reflect" "regexp" "strconv" "strings" @@ -40,13 +39,22 @@ func ParseTitles(tryNative bool, overwrite bool) { for _, library := range libraries { for _, gallery := range library.Galleries { + if db.TitleHashMatch(gallery.UUID) { + continue + } + _, currentTags, err := db.GetTags(gallery.UUID, false) - currentReference, err := db.GetReference(gallery.UUID) if err != nil { log.Z.Error("tags could not be retrieved when parsing titles", zap.String("err", err.Error())) continue } + currentReference, err := db.GetReference(gallery.UUID) + if err != nil { + log.Z.Error("reference could not be retrieved when parsing titles", zap.String("err", err.Error())) + continue + } + hasTitleTranslated := gallery.TitleTranslated != nil hasRelease := gallery.Released != nil hasSeries := gallery.Series != nil @@ -67,15 +75,15 @@ func ParseTitles(tryNative bool, overwrite bool) { titleMeta := ParseTitle(title) - if tryNative && reflect.ValueOf(titleMeta).IsZero() { + if tryNative && titleMeta == nil { titleMeta = ParseTitle(*titleNative) } - if reflect.ValueOf(titleMeta).IsZero() { + if titleMeta == nil { titleMeta = ParseTitle(filename) } - if !reflect.ValueOf(titleMeta).IsZero() { + if titleMeta != nil { if titleMeta.Title != "" && (!hasTitleTranslated || overwrite) { if gallery.Translated != nil && *gallery.Translated { gallery.TitleTranslated = &titleMeta.Title @@ -139,19 +147,21 @@ func ParseTitles(tryNative bool, overwrite bool) { gallery.Category = &manga } - err = db.UpdateGallery(gallery, currentTags, currentReference, true) - if err != nil { + if err = db.UpdateGallery(gallery, currentTags, currentReference, true); err != nil { log.Z.Error("failed to update gallery based on its title", zap.String("gallery", gallery.UUID), zap.String("err", err.Error())) } + log.Z.Info("metadata parsed based on title", + zap.String("uuid", gallery.UUID), + zap.String("title", gallery.Title)) } } } // ParseTitle parses the filename or title following the standard: // (Release) [Circle (Artist)] Title (Series) [ Language] or (Release) [Artist] Title (Series) [ Language] -func ParseTitle(title string) TitleMeta { +func ParseTitle(title string) *TitleMeta { match := nameRegex.FindStringSubmatch(title) var artists []string if match[3] != "" { @@ -164,7 +174,7 @@ func ParseTitle(title string) TitleMeta { } } - return TitleMeta{ + titleMeta := TitleMeta{ Released: strings.TrimSpace(match[1]), Circle: strings.TrimSpace(match[2]), Artists: strings.Split(strings.TrimSpace(match[3]), ", "), @@ -172,6 +182,12 @@ func ParseTitle(title string) TitleMeta { Series: strings.TrimSpace(match[5]), Language: strings.TrimSpace(match[6]), } + + if titleMeta.Released == "" && titleMeta.Circle == "" && len(titleMeta.Artists) == 0 && titleMeta.Title == "" && titleMeta.Series == "" && titleMeta.Language == "" { + return nil + } + + return &titleMeta } func containsTag(tags []model.Tag, namespace string, name *string) bool { diff --git a/pkg/metadata/x.go b/pkg/metadata/x.go index 4f2b145..a893792 100644 --- a/pkg/metadata/x.go +++ b/pkg/metadata/x.go @@ -58,13 +58,6 @@ type XMetadata struct { var metaExtensions = regexp.MustCompile(`\.json$`) -type NoMatchPaths struct { - libraryPath string - fullPath string -} - -var archivesNoMatch []NoMatchPaths - // unmarshalExhJSON parses ExH JSON bytes into XMetadata. func unmarshalExhJSON(byteValue []byte) (XMetadata, error) { var gallery XMetadata