Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

PICARD-3000: Children's Music is shown as "Children'S Music" in Picard #2548

Merged
merged 6 commits into from
Nov 5, 2024
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 2 additions & 28 deletions picard/script/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@
from functools import reduce
import operator
import re
import unicodedata

from picard.const.countries import RELEASE_COUNTRIES
from picard.extension_points.script_functions import script_function
Expand All @@ -57,6 +56,7 @@
)
from picard.util import (
pattern_as_regex,
titlecase,
uniqify,
)

Expand Down Expand Up @@ -962,33 +962,7 @@ def func_ne_any(parser, x, *args):
_Since Picard 2.1_"""
))
def func_title(parser, text):
# GPL 2.0 licensed code by Javier Kohen, Sambhav Kothari
# from https://github.com/metabrainz/picard-plugins/blob/2.0/plugins/titlecase/titlecase.py
if not text:
return text
capitalized = text[0].capitalize()
capital = False
for i in range(1, len(text)):
t = text[i]
if t in "’'" and text[i-1].isalpha():
capital = False
elif iswbound(t):
capital = True
elif capital and t.isalpha():
capital = False
t = t.capitalize()
else:
capital = False
capitalized += t
return capitalized


def iswbound(char):
# GPL 2.0 licensed code by Javier Kohen, Sambhav Kothari
# from https://github.com/metabrainz/picard-plugins/blob/2.0/plugins/titlecase/titlecase.py
""" Checks whether the given character is a word boundary """
category = unicodedata.category(char)
return 'Zs' == category or 'Sk' == category or 'P' == category[0]
return titlecase(text)


@script_function(documentation=N_(
Expand Down
30 changes: 28 additions & 2 deletions picard/track.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,10 @@
ScriptParser,
iter_active_tagging_scripts,
)
from picard.util import pattern_as_regex
from picard.util import (
pattern_as_regex,
titlecase,
)
from picard.util.imagelist import ImageList
from picard.util.textencoding import asciipunct

Expand Down Expand Up @@ -321,6 +324,29 @@ def _customize_metadata(self):

@staticmethod
def _genres_to_metadata(genres, limit=None, minusage=0, filters='', join_with=None):
def titlize(text):
"""Converts text to title case using custom rules.

Capitalizes the first character of each word while converting remaining
characters to lowercase. Handles contractions properly by keeping the
apostrophe intact.

Args:
text (str): The input string to be converted to title case.

Returns:
str: The input text converted to title case format.

Examples:
>>> titlize("children's music")
'Children's Music'
>>> titlize("blues")
"Blues"
"""
return re.sub(r"[A-Za-z]+('[A-Za-z]+)?",
lambda m: m.group(0)[0].upper() + m.group(0)[1:].lower(),
text)

zytact marked this conversation as resolved.
Show resolved Hide resolved
if limit is not None and limit < 1:
return []

Expand All @@ -335,7 +361,7 @@ def _genres_to_metadata(genres, limit=None, minusage=0, filters='', join_with=No

# Find most common genres
most_common_genres = genres.most_common(limit)
genres_list = [name.title() for name, _count in most_common_genres]
genres_list = [titlecase(name) for name, _count in most_common_genres]
genres_list.sort()

# And generate the genre metadata tag
Expand Down
48 changes: 48 additions & 0 deletions picard/util/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1183,3 +1183,51 @@ def detect_file_encoding(path, max_bytes_to_read=1024*256):
encoding = result['encoding'].lower()

return encoding


def iswbound(char):
# GPL 2.0 licensed code by Javier Kohen, Sambhav Kothari
# from https://github.com/metabrainz/picard-plugins/blob/2.0/plugins/titlecase/titlecase.py
""" Checks whether the given character is a word boundary """
category = unicodedata.category(char)
return 'Zs' == category or 'Sk' == category or 'P' == category[0]


def titlecase(text):
# GPL 2.0 licensed code by Javier Kohen, Sambhav Kothari
# from https://github.com/metabrainz/picard-plugins/blob/2.0/plugins/titlecase/titlecase.py
"""Converts text to title case following word boundary rules.

Capitalizes the first character of each word in the input text, where words
are determined by Unicode word boundaries. Preserves existing capitalization
after the first character of each word.

Args:
text (str): The input text to convert to title case.

Returns:
str: The text converted to title case. Returns empty string if input is empty.

Examples:
>>> titlecase("hello world")
'Hello World'
>>> titlecase("children's music")
'Children's Music'
"""
if not text:
return text
capitalized = text[0].capitalize()
capital = False
for i in range(1, len(text)):
t = text[i]
if t in "’'" and text[i-1].isalpha():
capital = False
elif iswbound(t):
capital = True
elif capital and t.isalpha():
capital = False
t = t.capitalize()
else:
capital = False
capitalized += t
return capitalized
49 changes: 49 additions & 0 deletions test/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@
pattern_as_regex,
sort_by_similarity,
system_supports_long_paths,
titlecase,
tracknum_and_title_from_filename,
tracknum_from_filename,
uniqify,
Expand Down Expand Up @@ -1019,3 +1020,51 @@ def test_detect_file_encoding_eac_windows_1251(self):
expected_encoding = 'windows-1251'
file_path = get_test_data_path('eac-windows1251.log')
self.assertEqual(expected_encoding, detect_file_encoding(file_path))


class TitlecaseTest(PicardTestCase):

def test_titlecase(self):
tests = (
# empty string
('', ''),
# simple cases
('hello world', 'Hello World'),
('Hello World', 'Hello World'),
('HELLO WORLD', 'HELLO WORLD'),
# contractions and possessives
("children's music", "Children's Music"),
("CHILDREN'S MUSIC", "CHILDREN'S MUSIC"),
("don't stop", "Don't Stop"),
# hyphenated words
('first-class ticket', 'First-Class Ticket'),
('FIRST-CLASS ticket', 'FIRST-CLASS Ticket'),
# multiple spaces
('hello world', 'Hello World'),
# punctuation
('hello, world!', 'Hello, World!'),
('hello... world', 'Hello... World'),
# special characters
('über café', 'Über Café'),
('españa', 'España'),
('ñandu', 'Ñandu'),
# single character words
('a b c', 'A B C'),
# numbers
('2001 a space odyssey', '2001 A Space Odyssey'),
# preserves existing capitalization after first letter
('MacDonald had a farm', 'MacDonald Had A Farm'),
('LaTeX document', 'LaTeX Document'),
# mixed case
('mIxEd CaSe', 'MIxEd CaSe'),
# unicode boundaries
('hello—world', 'Hello—World'),
('hello\u2014world', 'Hello\u2014World'),
# preserves all caps
('IBM PC', 'IBM PC'),
# single letter
('a', 'A'),
('A', 'A'),
)
for input, expected in tests:
self.assertEqual(expected, titlecase(input))
Loading