Skip to content

Commit

Permalink
Increase track attribute test coverage (#460)
Browse files Browse the repository at this point in the history
* adds stricter tests on track attributes and properties; fixes revealed track attribute and property bugs; lazy loads metadata in track objects

* review comments

Co-authored-by: Rachel Bittner <[email protected]>
  • Loading branch information
rabitt and Rachel Bittner authored Feb 2, 2021
1 parent 54cbacb commit e4dc985
Show file tree
Hide file tree
Showing 44 changed files with 576 additions and 159 deletions.
19 changes: 13 additions & 6 deletions mirdata/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,11 @@ def _track(self, track_id):
raise NotImplementedError
else:
return self._track_class(
track_id, self.data_home, self.name, self._index, self._metadata
track_id,
self.data_home,
self.name,
self._index,
lambda: self._metadata,
)

def load_tracks(self):
Expand Down Expand Up @@ -330,13 +334,16 @@ def __init__(

self._data_home = data_home
self._track_paths = index["tracks"][track_id]
self._metadata = metadata

if metadata and track_id in metadata:
self._track_metadata = metadata[track_id]
@property
def _track_metadata(self):
metadata = self._metadata()
if metadata and self.track_id in metadata:
return metadata[self.track_id]
elif metadata:
self._track_metadata = metadata
else:
self._track_metadata = None
return metadata
return None

def __repr__(self):
properties = [v for v in dir(self.__class__) if not v.startswith("_")]
Expand Down
55 changes: 35 additions & 20 deletions mirdata/datasets/acousticbrainz_genre.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,18 @@ class Track(core.Track):
genre (list): human-labeled genre and subgenres list
mbid (str): musicbrainz id
mbid_group (str): musicbrainz id group
artist (list): the track's artist/s
title (list): the track's title
date (list): the track's release date/s
filename (str): the track's filename
album (list): the track's album/s
track_number (list): the track number/s
tonal (dict): dictionary of acousticbrainz tonal features
low_level (dict): dictionary of acousticbrainz low-level features
rhythm (dict): dictionary of acousticbrainz rhythm features
Cached Properties:
acousticbrainz_metadata (dict): dictionary of metadata provided by AcousticBrainz
"""

Expand Down Expand Up @@ -189,30 +201,30 @@ def artist(self):
"""metadata artist annotation
Returns:
str: artist
list: artist
"""
return load_extractor(self.path)["metadata"]["artist"]
return self.acousticbrainz_metadata["metadata"]["tags"]["artist"]

@property
def title(self):
"""metadata title annotation
Returns:
str: title
list: title
"""
return load_extractor(self.path)["metadata"]["title"]
return self.acousticbrainz_metadata["metadata"]["tags"]["title"]

@property
def date(self):
"""metadata date annotation
Returns:
str: date
list: date
"""
return load_extractor(self.path)["metadata"]["date"]
return self.acousticbrainz_metadata["metadata"]["tags"]["date"]

@property
def file_name(self):
Expand All @@ -221,25 +233,25 @@ def file_name(self):
Returns:
str: file name
"""
return load_extractor(self.path)["metadata"]["file_name"]
return self.acousticbrainz_metadata["metadata"]["tags"]["file_name"]

@property
def album(self):
"""metadata album annotation
Returns:
str: album
list: album
"""
return load_extractor(self.path)["metadata"]["album"]
return self.acousticbrainz_metadata["metadata"]["tags"]["album"]

@property
def tracknumber(self):
"""metadata tracknumber annotation
Returns:
str: tracknumber
list: tracknumber
"""
return load_extractor(self.path)["metadata"]["tracknumber"]
return self.acousticbrainz_metadata["metadata"]["tags"]["tracknumber"]

@property
def tonal(self):
Expand All @@ -262,11 +274,11 @@ def tonal(self):
ChordsDescriptors
"""
return load_extractor(self.path)["tonal"]
return self.acousticbrainz_metadata["tonal"]

@property
def low_level(self):
"""low_level track descritors.
"""low_level track descriptors.
Returns:
dict:
Expand Down Expand Up @@ -309,7 +321,7 @@ def low_level(self):
SpectralContrast
"""
return load_extractor(self.path)["low_level"]
return self.acousticbrainz_metadata["lowlevel"]

@property
def rhythm(self):
Expand All @@ -332,7 +344,11 @@ def rhythm(self):
- 'onset_rate': number of detected onsets per second. Algorithms: OnsetRate
- 'danceability': danceability estimate. Algorithms: Danceability
"""
return load_extractor(self.path)["metadata"]["rhythm"]
return self.acousticbrainz_metadata["rhythm"]

@core.cached_property
def acousticbrainz_metadata(self):
return load_extractor(self.path)

def to_jams(self):
"""the track's data in jams format
Expand All @@ -344,9 +360,9 @@ def to_jams(self):
return jams_utils.jams_converter(
metadata={
"features": load_extractor(self.path),
"duration": load_extractor(self.path)["metadata"]["audio_properties"][
"length"
],
"duration": self.acousticbrainz_metadata["metadata"][
"audio_properties"
]["length"],
}
)

Expand All @@ -363,8 +379,7 @@ def load_extractor(fhandle):
* float - The sample rate of the audio file
"""
meta = json.load(fhandle)
return meta
return json.load(fhandle)


@core.docstring_inherit(core.Dataset)
Expand Down
24 changes: 19 additions & 5 deletions mirdata/datasets/cante100.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,11 +188,25 @@ def __init__(
self.f0_path = os.path.join(self._data_home, self._track_paths["f0"][0])
self.notes_path = os.path.join(self._data_home, self._track_paths["notes"][0])

self.identifier = self._track_metadata.get("musicBrainzID")
self.artist = self._track_metadata.get("artist")
self.title = self._track_metadata.get("title")
self.release = self._track_metadata.get("release")
self.duration = self._track_metadata.get("duration")
@property
def identifier(self):
return self._track_metadata.get("musicBrainzID")

@property
def artist(self):
return self._track_metadata.get("artist")

@property
def title(self):
return self._track_metadata.get("title")

@property
def release(self):
return self._track_metadata.get("release")

@property
def duration(self):
return self._track_metadata.get("duration")

@property
def audio(self) -> Tuple[np.ndarray, float]:
Expand Down
59 changes: 47 additions & 12 deletions mirdata/datasets/dali.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,18 +131,53 @@ def __init__(
)
self.audio_path = os.path.join(self._data_home, self._track_paths["audio"][0])

self.audio_url = self._track_metadata.get("audio", {}).get("url")
self.url_working = self._track_metadata.get("audio", {}).get("working")
self.ground_truth = self._track_metadata.get("ground-truth")
self.artist = self._track_metadata.get("artist")
self.title = self._track_metadata.get("title")
self.dataset_version = self._track_metadata.get("dataset_version")
self.scores_ncc = self._track_metadata.get("scores", {}).get("NCC")
self.scores_manual = self._track_metadata.get("scores", {}).get("manual")
self.album = self._track_metadata.get("metadata", {}).get("album")
self.release_date = self._track_metadata.get("metadata", {}).get("release_date")
self.genres = self._track_metadata.get("metadata", {}).get("genres")
self.language = self._track_metadata.get("metadata", {}).get("language")
@property
def audio_url(self):
return self._track_metadata.get("audio", {}).get("url")

@property
def url_working(self):
return self._track_metadata.get("audio", {}).get("working")

@property
def ground_truth(self):
return self._track_metadata.get("ground-truth")

@property
def artist(self):
return self._track_metadata.get("artist")

@property
def title(self):
return self._track_metadata.get("title")

@property
def dataset_version(self):
return self._track_metadata.get("dataset_version")

@property
def scores_ncc(self):
return self._track_metadata.get("scores", {}).get("NCC")

@property
def scores_manual(self):
return self._track_metadata.get("scores", {}).get("manual")

@property
def album(self):
return self._track_metadata.get("metadata", {}).get("album")

@property
def release_date(self):
return self._track_metadata.get("metadata", {}).get("release_date")

@property
def genres(self):
return self._track_metadata.get("metadata", {}).get("genres")

@property
def language(self):
return self._track_metadata.get("metadata", {}).get("language")

@core.cached_property
def notes(self) -> annotations.NoteData:
Expand Down
51 changes: 40 additions & 11 deletions mirdata/datasets/groove_midi.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,23 +234,52 @@ def __init__(
metadata,
)

self.drummer = self._track_metadata.get("drummer")
self.session = self._track_metadata.get("session")
self.style = self._track_metadata.get("style")
self.tempo = self._track_metadata.get("tempo")
self.beat_type = self._track_metadata.get("beat_type")
self.time_signature = self._track_metadata.get("time_signature")
self.duration = self._track_metadata.get("duration")
self.split = self._track_metadata.get("split")
self.midi_filename = self._track_metadata.get("midi_filename")
self.audio_filename = self._track_metadata.get("audio_filename")

self.midi_path = os.path.join(self._data_home, self._track_paths["midi"][0])

self.audio_path = core.none_path_join(
[self._data_home, self._track_paths["audio"][0]]
)

@property
def drummer(self):
return self._track_metadata.get("drummer")

@property
def session(self):
return self._track_metadata.get("session")

@property
def style(self):
return self._track_metadata.get("style")

@property
def tempo(self):
return self._track_metadata.get("tempo")

@property
def beat_type(self):
return self._track_metadata.get("beat_type")

@property
def time_signature(self):
return self._track_metadata.get("time_signature")

@property
def duration(self):
return self._track_metadata.get("duration")

@property
def split(self):
return self._track_metadata.get("split")

@property
def midi_filename(self):
return self._track_metadata.get("midi_filename")

@property
def audio_filename(self):
return self._track_metadata.get("audio_filename")

@property
def audio(self) -> Tuple[Optional[np.ndarray], Optional[float]]:
"""The track's audio
Expand Down
5 changes: 4 additions & 1 deletion mirdata/datasets/ikala.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,10 @@ def __init__(
self.audio_path = os.path.join(self._data_home, self._track_paths["audio"][0])
self.song_id = track_id.split("_")[0]
self.section = track_id.split("_")[1]
self.singer_id = self._track_metadata.get(self.song_id)

@property
def singer_id(self):
return self._track_metadata.get(self.song_id)

@core.cached_property
def f0(self) -> Optional[annotations.F0Data]:
Expand Down
24 changes: 19 additions & 5 deletions mirdata/datasets/maestro.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,11 +126,25 @@ def __init__(
self.audio_path = os.path.join(self._data_home, self._track_paths["audio"][0])
self.midi_path = os.path.join(self._data_home, self._track_paths["midi"][0])

self.canonical_composer = self._track_metadata.get("canonical_composer")
self.canonical_title = self._track_metadata.get("canonical_title")
self.split = self._track_metadata.get("split")
self.year = self._track_metadata.get("year")
self.duration = self._track_metadata.get("duration")
@property
def canonical_composer(self):
return self._track_metadata.get("canonical_composer")

@property
def canonical_title(self):
return self._track_metadata.get("canonical_title")

@property
def split(self):
return self._track_metadata.get("split")

@property
def year(self):
return self._track_metadata.get("year")

@property
def duration(self):
return self._track_metadata.get("duration")

@core.cached_property
def midi(self) -> Optional[pretty_midi.PrettyMIDI]:
Expand Down
Loading

0 comments on commit e4dc985

Please sign in to comment.