-
Notifications
You must be signed in to change notification settings - Fork 59
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Adding loader for CIPI dataset (#599)
* scripts/make, index and track and dataset class. TODO tests * fix docstring * modify the docs * download disclaimer * black * first test * fix metadata * remove embeddings * add more tests * black * modify tests * modify fix.py for adding music21 (optional) * fix bugt with load_scores * fix bugs * from smart_open import open * from smart_open import open * same error than francesco * test fulldataset * test fulldataset * test fulldataset * genis suggestion * replace os.path.exists by try catch * fix plobles with try catch * add cipi to CUSTOM_TEST_TRACKS * modify all the tests * black * smart open test * black * check embeddings * check embeddings * check embeddings * imrpoving codecov * rollback haydn_op20.py * rollback haydn_op20.py * comentario de los embeddings * cante100 -> cipi * baclk * expressiveness * fix make * Done! * Update cipi.py * difficulty annotation * fix docs table * add dataset details and fix error message * now doing the fixes right :) * address problem in table.rst --------- Co-authored-by: PRamoneda <[email protected]> Co-authored-by: Genís Plaja-Roglans <[email protected]> Co-authored-by: Guillem Cortès <[email protected]> Co-authored-by: genisplaja <[email protected]>
- Loading branch information
1 parent
afbc0c3
commit c1dccb0
Showing
17 changed files
with
126,342 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,231 @@ | ||
"""Can I play it? (CIPI) Dataset Loader | ||
.. admonition:: Dataset Info | ||
:class: dropdown | ||
The "Can I Play It?" (CIPI) dataset is a specialized collection of 652 classical piano scores, provided in a | ||
machine-readable MusicXML format and accompanied by integer-based difficulty levels ranging from 1 to 9, as | ||
verified by expert pianists. Then, it provides embeddings for fingering and expresiveness of the piece. Each | ||
recording has multiple scores corresponding to it. This dataset focuses exclusively on classical piano music, | ||
offering a rich resource for music researchers, educators, and students. Developed by the Music Technology Group | ||
in Barcelona, by P. Ramoneda et al. | ||
The CIPI dataset facilitates various applications such as the study of musical complexity, the selection of | ||
appropriately leveled pieces for students, and general research in music education. The dataset, alongside | ||
embeddings of multiple dimensions of difficulty, has been made publicly available to encourage ongoing innovation | ||
and collaboration within the music education and research communities. | ||
The dataset has been published alongside a paper in Expert Systems with Applications Journal. | ||
The dataset is shared under a Creative Commons Attribution Non Commercial Share Alike 4.0 International License, but | ||
need to be requested. Please do request the dataset here: https://zenodo.org/records/8037327. The dataset can only | ||
be used for open research purposes. | ||
""" | ||
import json | ||
import logging | ||
import os | ||
from typing import Optional, List | ||
|
||
from smart_open import open | ||
|
||
|
||
from mirdata import core, jams_utils | ||
|
||
try: | ||
import music21 | ||
except ImportError: | ||
logging.error( | ||
"In order to use cipi you must have music21 installed. " | ||
"Please reinstall mirdata using `pip install 'mirdata[cipi]'" | ||
) | ||
raise ImportError | ||
|
||
BIBTEX = """ | ||
@article{Ramoneda2024, | ||
author = {Pedro Ramoneda and Dasaem Jeong and Vsevolod Eremenko and Nazif Can Tamer and Marius Miron and Xavier Serra}, | ||
title = {Combining Piano Performance Dimensions for Score Difficulty Classification}, | ||
journal = {Expert Systems with Applications}, | ||
volume = {238}, | ||
pages = {121776}, | ||
year = {2024}, | ||
doi = {10.1016/j.eswa.2023.121776}, | ||
url = {https://doi.org/10.1016/j.eswa.2023.121776} | ||
}""" | ||
|
||
INDEXES = { | ||
"default": "1.0", | ||
"test": "1.0", | ||
"1.0": core.Index(filename="cipi_index_1.0.json"), | ||
} | ||
|
||
LICENSE_INFO = ( | ||
"Creative Commons Attribution Non Commercial Share Alike 4.0 International." | ||
) | ||
|
||
DOWNLOAD_INFO = """ | ||
Unfortunately the files of the CIPI dataset are available | ||
for download upon request here: https://zenodo.org/records/8037327. | ||
After requesting the dataset, you will receive a link to download the | ||
dataset. You must download scores.zip, embeddings.zip and index.json | ||
copy the files into the folder: | ||
> cipi/ | ||
> index.json | ||
> embeddings.zip | ||
> scores.zip | ||
unzip embedding.zip and scores.zip and copy the CIPI folder to {} | ||
""" | ||
|
||
|
||
class Track(core.Track): | ||
"""Can I play it? (CIPI) track class | ||
Args: | ||
track_id (str): track id of the track | ||
Attributes: | ||
title (str): title of the track | ||
book (str): book of the track | ||
URI (str): URI of the track | ||
composer (str): name of the author of the track | ||
track_id (str): track id | ||
musicxml_paths (list): path to musicxml score. If the music piece contains multiple movents the list will contain multiple paths. | ||
difficulty_annotation (int): annotated difficulty | ||
fingering_path (tuple): Path of fingering features from technique dimension computed with ArGNN fingering model. Return of two paths, the right hand and the ones of the left hand. Use torch.load(...) for loading the embeddings. | ||
expressiveness_path (str): Path of expressiveness features from sound dimension computed with virtuosoNet model.Use torch.load(...) for loading the embeddings. | ||
notes_path (str): Path of note features from notation dimension. Use torch.load(...) for loading the embeddings. | ||
Cached Properties: | ||
scores (list[music21.stream.Score]): music21 scores. If the work is splited in several movements the list will contain multiple scores. | ||
""" | ||
|
||
def __init__(self, track_id, data_home, dataset_name, index, metadata): | ||
super().__init__(track_id, data_home, dataset_name, index, metadata) | ||
self._data_home = data_home | ||
self.fingering_path = ( | ||
self.get_path("rh_fingering"), | ||
self.get_path("lh_fingering"), | ||
) | ||
self.expressiveness_path = self.get_path("expressiveness") | ||
self.notes_path = self.get_path("notes") | ||
|
||
@property | ||
def title(self) -> Optional[str]: | ||
return ( | ||
self._track_metadata["work_name"] | ||
if "work_name" in self._track_metadata | ||
else None | ||
) | ||
|
||
@property | ||
def book(self) -> Optional[str]: | ||
return self._track_metadata["book"] if "book" in self._track_metadata else None | ||
|
||
@property | ||
def URI(self) -> Optional[str]: | ||
return self._track_metadata["URI"] if "URI" in self._track_metadata else None | ||
|
||
@property | ||
def composer(self) -> Optional[str]: | ||
return ( | ||
self._track_metadata["composer"] | ||
if "composer" in self._track_metadata | ||
else None | ||
) | ||
|
||
@property | ||
def musicxml_paths(self) -> List[str]: | ||
return ( | ||
list(self._track_metadata["path"].values()) | ||
if "path" in self._track_metadata | ||
else [] | ||
) | ||
|
||
@property | ||
def difficulty_annotation(self) -> int: | ||
return ( | ||
self._track_metadata["henle"] if "henle" in self._track_metadata else None | ||
) | ||
|
||
@core.cached_property | ||
def scores(self) -> list: | ||
try: | ||
scores = [load_score(path, self._data_home) for path in self.musicxml_paths] | ||
except FileNotFoundError: | ||
raise FileNotFoundError( | ||
"Some MusicXML files for track id {} not found. " | ||
"Did you request, download, and store the files as indicated?".format( | ||
self.track_id | ||
) | ||
) | ||
return scores | ||
|
||
def to_jams(self): | ||
"""Get the track's data in jams format | ||
Returns: | ||
jams.JAMS: the track's data in jams format | ||
""" | ||
return jams_utils.jams_converter( | ||
metadata={ | ||
"title": self.title, | ||
"artist": self.composer, | ||
"duration": 0.0, | ||
"book": self.book, | ||
"URI": self.URI, | ||
"composer": self.composer, | ||
"track_id": self.track_id, | ||
"musicxml_paths": self.musicxml_paths, | ||
"difficulty_annotation": self.difficulty_annotation, | ||
} | ||
) | ||
|
||
|
||
def load_score( | ||
fhandle: str, data_home: str = "tests/resources/mir_datasets/cipi" | ||
) -> music21.stream.Score: | ||
"""Load cipi score in music21 stream | ||
Args: | ||
fhandle (str): path to MusicXML score | ||
data_home (str): path to cipi dataset | ||
Returns: | ||
music21.stream.Score: score in music21 format | ||
""" | ||
try: | ||
score = music21.converter.parse(os.path.join(data_home, fhandle)) | ||
except: | ||
raise FileNotFoundError("File {} not found.".format(fhandle)) | ||
return score | ||
|
||
|
||
@core.docstring_inherit(core.Dataset) | ||
class Dataset(core.Dataset): | ||
""" | ||
The Can I play it? (CIPI) dataset | ||
""" | ||
|
||
def __init__(self, data_home=None, version="default"): | ||
super().__init__( | ||
data_home, | ||
version, | ||
name="cipi", | ||
track_class=Track, | ||
bibtex=BIBTEX, | ||
indexes=INDEXES, | ||
license_info=LICENSE_INFO, | ||
download_info=DOWNLOAD_INFO, | ||
) | ||
|
||
@core.cached_property | ||
def _metadata(self): | ||
metadata_path = os.path.join(self.data_home, "index.json") | ||
try: | ||
with open(metadata_path, "r") as fhandle: | ||
metadata_index = json.load(fhandle) | ||
except FileNotFoundError: | ||
raise FileNotFoundError( | ||
f"Metadata {metadata_path} not found. Did you download the files?" | ||
) | ||
return dict(metadata_index) |
Oops, something went wrong.