diff --git a/picard/util/__init__.py b/picard/util/__init__.py index 6af22fda20..214f8ffef7 100644 --- a/picard/util/__init__.py +++ b/picard/util/__init__.py @@ -1167,10 +1167,11 @@ def strxfrm(string): ENCODING_BOMS = { + b'\xff\xfe\x00\x00': 'utf-32-le', + b'\x00\x00\xfe\xff': 'utf-32-be', + b'\xef\xbb\xbf': 'utf-8-sig', b'\xff\xfe': 'utf-16-le', b'\xfe\xff': 'utf-16-be', - b'\00\00\xff\xfe': 'utf-32-le', - b'\00\00\xfe\xff': 'utf-32-be', } diff --git a/test/data/eac-utf32le.log b/test/data/eac-utf32le.log new file mode 100644 index 0000000000..12df8b62a6 Binary files /dev/null and b/test/data/eac-utf32le.log differ diff --git a/test/test_utils.py b/test/test_utils.py index f3c997b28c..e95d66cf62 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -60,6 +60,7 @@ album_artist_from_path, any_exception_isinstance, build_qurl, + detect as charset_detect, detect_file_encoding, encoded_queryargs, extract_year_from_date, @@ -949,8 +950,8 @@ def test_detect_file_encoding_bom(self): boms = { b'\xff\xfe': 'utf-16-le', b'\xfe\xff': 'utf-16-be', - b'\00\00\xff\xfe': 'utf-32-le', - b'\00\00\xfe\xff': 'utf-32-be', + b'\xff\xfe\x00\x00': 'utf-32-le', + b'\x00\x00\xfe\xff': 'utf-32-be', b'\xef\xbb\xbf': 'utf-8-sig', b'': 'utf-8', b'\00': 'utf-8', @@ -960,7 +961,9 @@ def test_detect_file_encoding_bom(self): f = NamedTemporaryFile(delete=False) f.write(bom) f.close() - self.assertEqual(expected_encoding, detect_file_encoding(f.name)) + encoding = detect_file_encoding(f.name) + self.assertEqual(expected_encoding, encoding, + f'BOM {bom!r} detected as {encoding}, expected {expected_encoding}') finally: f.close() os.remove(f.name) @@ -970,6 +973,12 @@ def test_detect_file_encoding_eac_utf_16_le(self): file_path = get_test_data_path('eac-utf16le.log') self.assertEqual(expected_encoding, detect_file_encoding(file_path)) + def test_detect_file_encoding_eac_utf_32_le(self): + expected_encoding = 'utf-32-le' + file_path = get_test_data_path('eac-utf32le.log') + self.assertEqual(expected_encoding, detect_file_encoding(file_path)) + + @unittest.skipUnless(charset_detect, "test requires charset-normalizer or chardet package") def test_detect_file_encoding_eac_windows_1251(self): expected_encoding = 'windows-1251' file_path = get_test_data_path('eac-windows1251.log')