Skip to content

Commit

Permalink
Adding loader for CIPI dataset (#599)
Browse files Browse the repository at this point in the history
* scripts/make, index and track and dataset class. TODO tests

* fix docstring

* modify the docs

* download disclaimer

* black

* first test

* fix metadata

* remove embeddings

* add more tests

* black

* modify tests

* modify fix.py for adding music21 (optional)

* fix bugt with load_scores

* fix bugs

* from smart_open import open

* from smart_open import open

* same error than francesco

* test fulldataset

* test fulldataset

* test fulldataset

* genis suggestion

* replace os.path.exists by try catch

* fix plobles with try catch

* add cipi to CUSTOM_TEST_TRACKS

* modify all the tests

* black

* smart open test

* black

* check embeddings

* check embeddings

* check embeddings

* imrpoving codecov

* rollback haydn_op20.py

* rollback haydn_op20.py

* comentario de los embeddings

* cante100 -> cipi

* baclk

* expressiveness

* fix make

* Done!

* Update cipi.py

* difficulty annotation

* fix docs table

* add dataset details and fix error message

* now doing the fixes right :)

* address problem in table.rst

---------

Co-authored-by: PRamoneda <[email protected]>
Co-authored-by: Genís Plaja-Roglans <[email protected]>
Co-authored-by: Guillem Cortès <[email protected]>
Co-authored-by: genisplaja <[email protected]>
  • Loading branch information
5 people authored Nov 2, 2023
1 parent afbc0c3 commit c1dccb0
Show file tree
Hide file tree
Showing 17 changed files with 126,342 additions and 2 deletions.
8 changes: 8 additions & 0 deletions docs/source/mirdata.rst
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,14 @@ cante100
:inherited-members:


cipi
^^^^

.. automodule:: mirdata.datasets.cipi
:members:
:inherited-members:


compmusic_carnatic_rhythm
^^^^^^^^^^^^^^^^^^^^^^^^^

Expand Down
9 changes: 9 additions & 0 deletions docs/source/table.rst
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,15 @@
- 100
- :cante:`\ `

* - CIPI
- - musicXML: 🔑
- embeddings: 🔑
- annotations: 🔑
- - difficulty levels
- 652
- .. image:: https://licensebuttons.net/l/by-nc-sa/4.0/80x15.png
:target: https://creativecommons.org/licenses/by-nc-sa/4.0

* - .. line-block::

(CompMusic)
Expand Down
3 changes: 1 addition & 2 deletions mirdata/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -529,13 +529,12 @@ def __init__(self, track_id, data_home, dataset_name, index, metadata):
raise ValueError(
"{} is not a valid track_id in {}".format(track_id, dataset_name)
)

self._metadata = metadata
self.track_id = track_id
self._dataset_name = dataset_name

self._data_home = data_home
self._track_paths = index["tracks"][track_id]
self._metadata = metadata

@cached_property
def _track_metadata(self):
Expand Down
231 changes: 231 additions & 0 deletions mirdata/datasets/cipi.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,231 @@
"""Can I play it? (CIPI) Dataset Loader
.. admonition:: Dataset Info
:class: dropdown
The "Can I Play It?" (CIPI) dataset is a specialized collection of 652 classical piano scores, provided in a
machine-readable MusicXML format and accompanied by integer-based difficulty levels ranging from 1 to 9, as
verified by expert pianists. Then, it provides embeddings for fingering and expresiveness of the piece. Each
recording has multiple scores corresponding to it. This dataset focuses exclusively on classical piano music,
offering a rich resource for music researchers, educators, and students. Developed by the Music Technology Group
in Barcelona, by P. Ramoneda et al.
The CIPI dataset facilitates various applications such as the study of musical complexity, the selection of
appropriately leveled pieces for students, and general research in music education. The dataset, alongside
embeddings of multiple dimensions of difficulty, has been made publicly available to encourage ongoing innovation
and collaboration within the music education and research communities.
The dataset has been published alongside a paper in Expert Systems with Applications Journal.
The dataset is shared under a Creative Commons Attribution Non Commercial Share Alike 4.0 International License, but
need to be requested. Please do request the dataset here: https://zenodo.org/records/8037327. The dataset can only
be used for open research purposes.
"""
import json
import logging
import os
from typing import Optional, List

from smart_open import open


from mirdata import core, jams_utils

try:
import music21
except ImportError:
logging.error(
"In order to use cipi you must have music21 installed. "
"Please reinstall mirdata using `pip install 'mirdata[cipi]'"
)
raise ImportError

BIBTEX = """
@article{Ramoneda2024,
author = {Pedro Ramoneda and Dasaem Jeong and Vsevolod Eremenko and Nazif Can Tamer and Marius Miron and Xavier Serra},
title = {Combining Piano Performance Dimensions for Score Difficulty Classification},
journal = {Expert Systems with Applications},
volume = {238},
pages = {121776},
year = {2024},
doi = {10.1016/j.eswa.2023.121776},
url = {https://doi.org/10.1016/j.eswa.2023.121776}
}"""

INDEXES = {
"default": "1.0",
"test": "1.0",
"1.0": core.Index(filename="cipi_index_1.0.json"),
}

LICENSE_INFO = (
"Creative Commons Attribution Non Commercial Share Alike 4.0 International."
)

DOWNLOAD_INFO = """
Unfortunately the files of the CIPI dataset are available
for download upon request here: https://zenodo.org/records/8037327.
After requesting the dataset, you will receive a link to download the
dataset. You must download scores.zip, embeddings.zip and index.json
copy the files into the folder:
> cipi/
> index.json
> embeddings.zip
> scores.zip
unzip embedding.zip and scores.zip and copy the CIPI folder to {}
"""


class Track(core.Track):
"""Can I play it? (CIPI) track class
Args:
track_id (str): track id of the track
Attributes:
title (str): title of the track
book (str): book of the track
URI (str): URI of the track
composer (str): name of the author of the track
track_id (str): track id
musicxml_paths (list): path to musicxml score. If the music piece contains multiple movents the list will contain multiple paths.
difficulty_annotation (int): annotated difficulty
fingering_path (tuple): Path of fingering features from technique dimension computed with ArGNN fingering model. Return of two paths, the right hand and the ones of the left hand. Use torch.load(...) for loading the embeddings.
expressiveness_path (str): Path of expressiveness features from sound dimension computed with virtuosoNet model.Use torch.load(...) for loading the embeddings.
notes_path (str): Path of note features from notation dimension. Use torch.load(...) for loading the embeddings.
Cached Properties:
scores (list[music21.stream.Score]): music21 scores. If the work is splited in several movements the list will contain multiple scores.
"""

def __init__(self, track_id, data_home, dataset_name, index, metadata):
super().__init__(track_id, data_home, dataset_name, index, metadata)
self._data_home = data_home
self.fingering_path = (
self.get_path("rh_fingering"),
self.get_path("lh_fingering"),
)
self.expressiveness_path = self.get_path("expressiveness")
self.notes_path = self.get_path("notes")

@property
def title(self) -> Optional[str]:
return (
self._track_metadata["work_name"]
if "work_name" in self._track_metadata
else None
)

@property
def book(self) -> Optional[str]:
return self._track_metadata["book"] if "book" in self._track_metadata else None

@property
def URI(self) -> Optional[str]:
return self._track_metadata["URI"] if "URI" in self._track_metadata else None

@property
def composer(self) -> Optional[str]:
return (
self._track_metadata["composer"]
if "composer" in self._track_metadata
else None
)

@property
def musicxml_paths(self) -> List[str]:
return (
list(self._track_metadata["path"].values())
if "path" in self._track_metadata
else []
)

@property
def difficulty_annotation(self) -> int:
return (
self._track_metadata["henle"] if "henle" in self._track_metadata else None
)

@core.cached_property
def scores(self) -> list:
try:
scores = [load_score(path, self._data_home) for path in self.musicxml_paths]
except FileNotFoundError:
raise FileNotFoundError(
"Some MusicXML files for track id {} not found. "
"Did you request, download, and store the files as indicated?".format(
self.track_id
)
)
return scores

def to_jams(self):
"""Get the track's data in jams format
Returns:
jams.JAMS: the track's data in jams format
"""
return jams_utils.jams_converter(
metadata={
"title": self.title,
"artist": self.composer,
"duration": 0.0,
"book": self.book,
"URI": self.URI,
"composer": self.composer,
"track_id": self.track_id,
"musicxml_paths": self.musicxml_paths,
"difficulty_annotation": self.difficulty_annotation,
}
)


def load_score(
fhandle: str, data_home: str = "tests/resources/mir_datasets/cipi"
) -> music21.stream.Score:
"""Load cipi score in music21 stream
Args:
fhandle (str): path to MusicXML score
data_home (str): path to cipi dataset
Returns:
music21.stream.Score: score in music21 format
"""
try:
score = music21.converter.parse(os.path.join(data_home, fhandle))
except:
raise FileNotFoundError("File {} not found.".format(fhandle))
return score


@core.docstring_inherit(core.Dataset)
class Dataset(core.Dataset):
"""
The Can I play it? (CIPI) dataset
"""

def __init__(self, data_home=None, version="default"):
super().__init__(
data_home,
version,
name="cipi",
track_class=Track,
bibtex=BIBTEX,
indexes=INDEXES,
license_info=LICENSE_INFO,
download_info=DOWNLOAD_INFO,
)

@core.cached_property
def _metadata(self):
metadata_path = os.path.join(self.data_home, "index.json")
try:
with open(metadata_path, "r") as fhandle:
metadata_index = json.load(fhandle)
except FileNotFoundError:
raise FileNotFoundError(
f"Metadata {metadata_path} not found. Did you download the files?"
)
return dict(metadata_index)
Loading

0 comments on commit c1dccb0

Please sign in to comment.