Skip to content

Commit

Permalink
feat: greatly improve gallery and metadata parsing
Browse files Browse the repository at this point in the history
  • Loading branch information
CrescentKohana committed Mar 22, 2024
1 parent b5920b0 commit d6987a8
Show file tree
Hide file tree
Showing 8 changed files with 139 additions and 107 deletions.
2 changes: 2 additions & 0 deletions pkg/api/task.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,11 +56,13 @@ func findMetadata(w http.ResponseWriter, r *http.Request) {
x := r.URL.Query().Get("x")
ehdl := r.URL.Query().Get("ehdl")
hath := r.URL.Query().Get("hath")
fuzzy := r.URL.Query().Get("fuzzy")

metaTypes := make(map[metadata.MetaType]bool)
metaTypes[metadata.XMeta] = x == "true"
metaTypes[metadata.EHDLMeta] = ehdl == "true"
metaTypes[metadata.HathMeta] = hath == "true"
metaTypes[metadata.FuzzyMatch] = fuzzy == "true"
go metadata.ParseMetadata(metaTypes)

if title == "true" {
Expand Down
4 changes: 2 additions & 2 deletions pkg/db/user.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@ import (
"database/sql"
"encoding/base64"
"github.com/Mangatsu/server/pkg/log"
"github.com/Mangatsu/server/pkg/types/model"
. "github.com/Mangatsu/server/pkg/types/table"
"github.com/Mangatsu/server/pkg/types/sqlite/model"
. "github.com/Mangatsu/server/pkg/types/sqlite/table"
"github.com/Mangatsu/server/pkg/utils"
. "github.com/go-jet/jet/v2/sqlite"
"github.com/google/uuid"
Expand Down
9 changes: 9 additions & 0 deletions pkg/library/scan.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,15 @@ import (

func countImages(archivePath string) (uint64, error) {
filesystem, err := archiver.FileSystem(nil, archivePath)
if err != nil {
log.Z.Error("could not open archive",
zap.String("path", archivePath),
zap.String("err", err.Error()),
)

return 0, err
}

var fileCount uint64

err = fs.WalkDir(filesystem, ".", func(s string, d fs.DirEntry, err error) error {
Expand Down
27 changes: 10 additions & 17 deletions pkg/metadata/ehdl.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ package metadata

import (
"bufio"
"os"
"bytes"
"regexp"
"strconv"
"strings"
Expand All @@ -17,24 +17,17 @@ var sizeRegex = regexp.MustCompile(`File Size:\s*(\d+(?:\.\d+)?)`)

// ParseEHDL parses given text file. Input file is expected to be in the H@H (Hath) format (galleryinfo.txt).
// Input file is expected to be in the E-Hentai-Downloader format (info.txt).
func ParseEHDL(filePath string) (model.Gallery, []model.Tag, error) {
file, err := os.Open(filePath)
func ParseEHDL(metaPath string, metaData []byte, internal bool) (model.Gallery, []model.Tag, model.Reference, error) {
gallery := model.Gallery{}
reference := model.Reference{}
var tags []model.Tag

if err != nil {
return gallery, nil, err
reference := model.Reference{
MetaPath: &metaPath,
MetaInternal: internal,
Urls: nil,
}
var tags []model.Tag

defer func(file *os.File) {
err := file.Close()
if err != nil {
log.Z.Debug("failed to close EHDL formatted file", zap.String("err", err.Error()))
}
}(file)

scanner := bufio.NewScanner(file)
buffer := bytes.NewBuffer(metaData)
scanner := bufio.NewScanner(buffer)
lineNumber := -1

for scanner.Scan() {
Expand Down Expand Up @@ -126,5 +119,5 @@ func ParseEHDL(filePath string) (model.Gallery, []model.Tag, error) {
}
}

return gallery, tags, nil
return gallery, tags, reference, nil
}
26 changes: 10 additions & 16 deletions pkg/metadata/hath.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,30 +2,24 @@ package metadata

import (
"bufio"
"os"
"bytes"
"strings"

"github.com/Mangatsu/server/pkg/types/sqlite/model"
)

// ParseHath parses given text file. Input file is expected to be in the H@H (Hath) format (galleryinfo.txt).
func ParseHath(filePath string) (model.Gallery, []model.Tag, error) {
file, err := os.Open(filePath)
func ParseHath(metaPath string, metaData []byte, internal bool) (model.Gallery, []model.Tag, model.Reference, error) {
gallery := model.Gallery{}
var tags []model.Tag

if err != nil {
return gallery, nil, err
reference := model.Reference{
MetaPath: &metaPath,
MetaInternal: internal,
Urls: nil,
}
var tags []model.Tag

defer func(file *os.File) {
err := file.Close()
if err != nil {
log.Z.Debug("failed to close Hath formatted file", zap.String("err", err.Error()))
}
}(file)

scanner := bufio.NewScanner(file)
buffer := bytes.NewBuffer(metaData)
scanner := bufio.NewScanner(buffer)

for scanner.Scan() {
line := scanner.Text()
Expand Down Expand Up @@ -59,5 +53,5 @@ func ParseHath(filePath string) (model.Gallery, []model.Tag, error) {
}
}

return gallery, tags, nil
return gallery, tags, reference, nil
}
137 changes: 81 additions & 56 deletions pkg/metadata/scan.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,17 @@ import (
type MetaType string

const (
XMeta MetaType = "xmeta"
HathMeta = "hathmeta"
EHDLMeta = "ehdlmeta"
XMeta MetaType = "xmeta"
HathMeta = "hathmeta"
EHDLMeta = "ehdlmeta"
FuzzyMatch = "fuzzy"
)

type NoMatchPaths struct {
libraryPath string
fullPath string
}

// matchInternalMeta reads the internal metadata (info.json, info.txt or galleryinfo.txt) from the given archive.
func matchInternalMeta(metaTypes map[MetaType]bool, fullArchivePath string) ([]byte, string, MetaType) {
filesystem, err := archiver.FileSystem(nil, fullArchivePath)
Expand Down Expand Up @@ -77,7 +83,6 @@ func matchExternalMeta(metaTypes map[MetaType]bool, fullArchivePath string, libr
externalJSON := constants.ArchiveExtensions.ReplaceAllString(fullArchivePath, ".json")

if !utils.PathExists(externalJSON) {
archivesNoMatch = append(archivesNoMatch, NoMatchPaths{libraryPath: libraryPath, fullPath: fullArchivePath})
return nil, ""
}

Expand All @@ -100,86 +105,106 @@ func ParseMetadata(metaTypes map[MetaType]bool) {
return
}

var archivesWithNoMatch []NoMatchPaths

for _, galleryLibrary := range libraries {
for _, gallery := range galleryLibrary.Galleries {
fullPath := config.BuildLibraryPath(galleryLibrary.Path, gallery.ArchivePath)

var metaData []byte
var metaPath string
internal := false
internalDataFound := false

// X, Hath, EHDL
metaData, metaPath, metaType := matchInternalMeta(metaTypes, fullPath)
if metaData != nil {
internal = true
internalDataFound = true
}

// X
if !internal {
if !internalDataFound {
metaData, metaPath = matchExternalMeta(metaTypes, fullPath, galleryLibrary.Path)
metaType = XMeta
}

if metaData != nil {
var newGallery model.Gallery
var tags []model.Tag
var reference model.Reference

switch metaType {
case XMeta:
if newGallery, tags, reference, err = ParseX(metaData, metaPath, gallery.ArchivePath, internal); err != nil {
log.Z.Debug("could not parse X meta",
zap.String("path", metaPath),
zap.String("err", err.Error()))

cache.ProcessingStatusCache.AddMetadataError(gallery.UUID, err.Error(), map[string]string{
"metaType": string(metaType),
"metaPath": metaPath,
})
continue
}
case EHDLMeta:
if newGallery, tags, err = ParseEHDL(metaPath); err != nil {
log.Z.Debug("could not parse EHDL meta",
zap.String("path", metaPath),
zap.String("err", err.Error()))

cache.ProcessingStatusCache.AddMetadataError(gallery.UUID, err.Error(), map[string]string{
"metaType": string(metaType),
"metaPath": metaPath,
})
continue
}
case HathMeta:
if newGallery, tags, err = ParseHath(metaPath); err != nil {
log.Z.Debug("could not parse Hath meta",
zap.String("path", metaPath),
zap.String("err", err.Error()))

cache.ProcessingStatusCache.AddMetadataError(gallery.UUID, err.Error(), map[string]string{
"metaType": string(metaType),
"metaPath": metaPath,
})
continue
}
if metaData == nil {
if metaTypes[FuzzyMatch] {
archivesWithNoMatch = append(archivesWithNoMatch, NoMatchPaths{libraryPath: galleryLibrary.Path, fullPath: fullPath})
}
continue
}

if err = db.UpdateGallery(newGallery, tags, reference, true); err != nil {
log.Z.Debug("could not tag gallery",
zap.String("path", gallery.ArchivePath),
var newGallery model.Gallery
var tags []model.Tag
var reference model.Reference

switch metaType {
case XMeta:
if newGallery, tags, reference, err = ParseX(metaData, metaPath, gallery.ArchivePath, internalDataFound); err != nil {
log.Z.Debug("could not parse X meta",
zap.String("path", metaPath),
zap.String("err", err.Error()))

cache.ProcessingStatusCache.AddMetadataError(newGallery.UUID, err.Error(), map[string]string{
"path": gallery.ArchivePath,
cache.ProcessingStatusCache.AddMetadataError(gallery.UUID, err.Error(), map[string]string{
"metaType": string(metaType),
"metaPath": metaPath,
})
continue
}
case EHDLMeta:
if newGallery, tags, reference, err = ParseEHDL(metaPath, metaData, internalDataFound); err != nil {
log.Z.Debug("could not parse EHDL meta",
zap.String("path", metaPath),
zap.String("err", err.Error()))

cache.ProcessingStatusCache.AddMetadataError(gallery.UUID, err.Error(), map[string]string{
"metaType": string(metaType),
"metaPath": metaPath,
})
continue
}
case HathMeta:
if newGallery, tags, reference, err = ParseHath(metaPath, metaData, internalDataFound); err != nil {
log.Z.Debug("could not parse Hath meta",
zap.String("path", metaPath),
zap.String("err", err.Error()))

cache.ProcessingStatusCache.AddMetadataError(gallery.UUID, err.Error(), map[string]string{
"metaType": string(metaType),
"metaPath": metaPath,
})
continue
}
}

// Adds the UUID and archive path to the new gallery.
newGallery.UUID = gallery.UUID
newGallery.ArchivePath = gallery.ArchivePath

err = db.UpdateGallery(newGallery, tags, reference, true)
if err != nil {
log.Z.Debug("could not tag gallery",
zap.String("path", gallery.ArchivePath),
zap.String("err", err.Error()))

cache.ProcessingStatusCache.AddMetadataError(newGallery.UUID, err.Error(), map[string]string{
"path": gallery.ArchivePath,
})
continue
}

log.Z.Info("metadata parsed",
zap.String("metaType", string(metaType)),
zap.String("uuid", gallery.UUID),
zap.String("title", gallery.Title),
zap.String("path", gallery.ArchivePath),
zap.String("metaPath", metaPath),
)
}
}

// Fuzzy parsing for all archives that didn't have an exact match.
for _, noMatch := range archivesNoMatch {
for _, noMatch := range archivesWithNoMatch {
onlyDir := filepath.Dir(noMatch.fullPath)
files, err := os.ReadDir(onlyDir)
if err != nil {
Expand All @@ -191,7 +216,7 @@ func ParseMetadata(metaTypes map[MetaType]bool) {
for _, f := range files {
r, exhGallery := fuzzyMatchExternalMeta(noMatch.fullPath, noMatch.libraryPath, f)

if r.MatchedArchivePath != "" && r.MetaTitleMatch || r.Similarity > 0.70 {
if r.MatchedArchivePath != "" && r.MetaTitleMatch || r.Similarity > config.Options.GalleryOptions.FuzzySearchSimilarity {
gallery, tags, reference := convertExh(exhGallery, r.MatchedArchivePath, r.RelativeMetaPath, false)

if !r.MetaTitleMatch {
Expand Down
Loading

0 comments on commit d6987a8

Please sign in to comment.