From bae9b566e5eb8cb6507cb86509d677b59aebc128 Mon Sep 17 00:00:00 2001 From: Marek Skrobacki Date: Sun, 3 Nov 2024 21:43:17 +0000 Subject: [PATCH 1/3] Improve accuracy of matching album to dirname This code resolves a problem with some of the albums not being matched by other heuristics in the get_album_dir() method by taking slightly different approach. It emulates what humans do - mentally break down the name into logical chunks that represent an artist name, album name and ignoring everything else. The testing suite has been extended with a few examples of albums exhibiting problems with the way matching was done prior to this commit. --- .../providers/filesystem_local/helpers.py | 56 +++++++++++++++++++ tests/providers/filesystem/test_helpers.py | 31 ++++++++-- 2 files changed, 83 insertions(+), 4 deletions(-) diff --git a/music_assistant/providers/filesystem_local/helpers.py b/music_assistant/providers/filesystem_local/helpers.py index 57e87b93d..7e15e550d 100644 --- a/music_assistant/providers/filesystem_local/helpers.py +++ b/music_assistant/providers/filesystem_local/helpers.py @@ -67,6 +67,48 @@ def get_artist_dir( return matched_dir +def tokenize(input_str: str, delimiters: str) -> list[str]: + """Tokenizes the album names or paths.""" + normalised = re.sub(delimiters, "^^^", input_str) + return [x for x in normalised.split("^^^") if x != ""] + + +def _dir_contains_album_name(id3_album_name: str, directory_name: str) -> bool: + """Check if a directory name contains an album name. + + This function tokenizes both input strings using different delimiters and + checks if the album name is a substring of the directory name. + + First iteration considers the literal dash as one of the separators. The + second pass is to catch edge cases where the literal dash is part of the + album's name, not an actual separator. For example, an album like 'Aphex + Twin - Selected Ambient Works 85-92' would be correctly handled. + + Args: + id3_album_name (str): The album name to search for. + directory_name (str): The directory name to search in. + + Returns: + bool: True if the directory name contains the album name, False otherwise. + """ + for delims in ["[-_ ]", "[_ ]"]: + tokenized_album_name = tokenize(id3_album_name, delims) + tokenized_dirname = tokenize(directory_name, delims) + + # Exact match, potentially just on the album name + # in case artist's name is not included in id3_album_name + if all(token in tokenized_dirname for token in tokenized_album_name): + return True + + if len(tokenized_album_name) <= len(tokenized_dirname) and compare_strings( + "".join(tokenized_album_name), + "".join(tokenized_dirname[0 : len(tokenized_album_name)]), + False, + ): + return True + return False + + def get_album_dir(track_dir: str, album_name: str) -> str | None: """Return album/parent directory of a track.""" parentdir = track_dir @@ -82,6 +124,20 @@ def get_album_dir(track_dir: str, album_name: str) -> str | None: if compare_strings(album_name, dirname.split(" - ")[-1].split("(")[0], False): # account for ArtistName - AlbumName (Version) format in the directory name return parentdir + + if any(sep in dirname for sep in ["-", " ", "_"]) and album_name: + album_chunks = album_name.split(" - ", 1) + album_name_includes_artist = len(album_chunks) > 1 + just_album_name = album_chunks[1] if album_name_includes_artist else None + + # attempt matching using tokenized version of path and album name + # with _dir_contains_album_name() + if just_album_name and _dir_contains_album_name(just_album_name, dirname): + return parentdir + + if _dir_contains_album_name(album_name, dirname): + return parentdir + if compare_strings(album_name.split("(")[0], dirname, False): # account for AlbumName (Version) format in the album name return parentdir diff --git a/tests/providers/filesystem/test_helpers.py b/tests/providers/filesystem/test_helpers.py index 591a36ec5..5145d9d91 100644 --- a/tests/providers/filesystem/test_helpers.py +++ b/tests/providers/filesystem/test_helpers.py @@ -62,17 +62,40 @@ def test_get_artist_dir() -> None: "/home/user/Music/Aphex Twin - Selected Ambient Works 85-92 (Remastered) - WEB", "/home/user/Music/Aphex Twin - Selected Ambient Works 85-92 (Remastered) - WEB", ), + # Test tokenizer - dirname with extras + ( + "Fokus - Prewersje", + "/home/user/Fokus-Prewersje-PL-WEB-FLAC-2021-PS_INT", + "/home/user/Fokus-Prewersje-PL-WEB-FLAC-2021-PS_INT", + ), + # Test tokenizer - dirname with version and extras + ( + "Layo And Bushwacka - Night Works", + "/home/music/Layo_And_Bushwacka-Night_Works_(Reissue)-(XLCD_154X)-FLAC-2003", + "/home/music/Layo_And_Bushwacka-Night_Works_(Reissue)-(XLCD_154X)-FLAC-2003", + ), + # Test tokenizer - extras and approximate match on diacratics + ( + "Łona i Webber - Wyślij Sobie Pocztówkę", + "/usr/others/Lona-Discography-PL-FLAC-2020-INT/Lona_I_Webber-Wyslij_Sobie_Pocztowke-PL-WEB-FLAC-2014-PS", + "/usr/others/Lona-Discography-PL-FLAC-2020-INT/Lona_I_Webber-Wyslij_Sobie_Pocztowke-PL-WEB-FLAC-2014-PS", + ), + ( + "NIC", + "/nas/downloads/others/Sokol-NIC-PL-WEB-FLAC-2021", + "/nas/downloads/others/Sokol-NIC-PL-WEB-FLAC-2021", + ), # Test album (version) format ( - "Selected Ambient Works 85-92", + "Aphex Twin - Selected Ambient Works 85-92", "/home/user/Music/Aphex Twin/Selected Ambient Works 85-92 (Remastered)", "/home/user/Music/Aphex Twin/Selected Ambient Works 85-92 (Remastered)", ), # Test album name in dir ( - "Selected Ambient Works 85-92", - "/home/user/Music/RandomDirWithSelected Ambient Works 85-92InIt", - "/home/user/Music/RandomDirWithSelected Ambient Works 85-92InIt", + "Aphex Twin - Selected Ambient Works 85-92", + "/home/user/Music/RandomDirWithAphex Twin - Selected Ambient Works 85-92InIt", + "/home/user/Music/RandomDirWithAphex Twin - Selected Ambient Works 85-92InIt", ), # Test no match ( From 0bf461ce1c1eebc39f5574fe9b9257486813d223 Mon Sep 17 00:00:00 2001 From: Marek Skrobacki Date: Sun, 17 Nov 2024 11:37:17 +0000 Subject: [PATCH 2/3] Fix: unsupported operand type(s) for 'in': 'str' and 'EnumType' Fixes: 2024-11-17 10:48:20.315 ERROR (MainThread) [music_assistant] Error doing task: Task exception was never retrieved Traceback (most recent call last): File "/home/skrobul/devel/music-assistant-server/music_assistant/providers/filesystem_local/__init__.py", line 1014, in _parse_album if images := await self._get_local_images(folder_path): ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/home/skrobul/devel/music-assistant-server/music_assistant/providers/filesystem_local/__init__.py", line 1032, in _get_local_images if item.name in ImageType: ^^^^^^^^^^^^^^^^^^^^^^ File "/run/current-system/sw/lib/python3.11/enum.py", line 742, in __contains__ raise TypeError( TypeError: unsupported operand type(s) for 'in': 'str' and 'EnumType' --- music_assistant/providers/filesystem_local/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/music_assistant/providers/filesystem_local/__init__.py b/music_assistant/providers/filesystem_local/__init__.py index 317d8aa62..8efde6b53 100644 --- a/music_assistant/providers/filesystem_local/__init__.py +++ b/music_assistant/providers/filesystem_local/__init__.py @@ -1028,7 +1028,7 @@ async def _get_local_images(self, folder: str) -> UniqueList[MediaItemImage]: if item.ext != ext: continue # try match on filename = one of our imagetypes - if item.name in ImageType: + if item.name in ImageType.__members__: images.append( MediaItemImage( type=ImageType(item.name), From 55b3c78ec27f03b1a52416e120c0f151bf0cc1a9 Mon Sep 17 00:00:00 2001 From: Marcel van der Veldt Date: Wed, 20 Nov 2024 18:47:51 +0100 Subject: [PATCH 3/3] Update music_assistant/providers/filesystem_local/__init__.py --- music_assistant/providers/filesystem_local/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/music_assistant/providers/filesystem_local/__init__.py b/music_assistant/providers/filesystem_local/__init__.py index 8efde6b53..317d8aa62 100644 --- a/music_assistant/providers/filesystem_local/__init__.py +++ b/music_assistant/providers/filesystem_local/__init__.py @@ -1028,7 +1028,7 @@ async def _get_local_images(self, folder: str) -> UniqueList[MediaItemImage]: if item.ext != ext: continue # try match on filename = one of our imagetypes - if item.name in ImageType.__members__: + if item.name in ImageType: images.append( MediaItemImage( type=ImageType(item.name),