diff --git a/mirdata/core.py b/mirdata/core.py index a0fef7c3e..71a59df3b 100644 --- a/mirdata/core.py +++ b/mirdata/core.py @@ -196,7 +196,11 @@ def _track(self, track_id): raise NotImplementedError else: return self._track_class( - track_id, self.data_home, self.name, self._index, self._metadata + track_id, + self.data_home, + self.name, + self._index, + lambda: self._metadata, ) def load_tracks(self): @@ -330,13 +334,16 @@ def __init__( self._data_home = data_home self._track_paths = index["tracks"][track_id] + self._metadata = metadata - if metadata and track_id in metadata: - self._track_metadata = metadata[track_id] + @property + def _track_metadata(self): + metadata = self._metadata() + if metadata and self.track_id in metadata: + return metadata[self.track_id] elif metadata: - self._track_metadata = metadata - else: - self._track_metadata = None + return metadata + return None def __repr__(self): properties = [v for v in dir(self.__class__) if not v.startswith("_")] diff --git a/mirdata/datasets/acousticbrainz_genre.py b/mirdata/datasets/acousticbrainz_genre.py index a98ece7b3..44220c273 100644 --- a/mirdata/datasets/acousticbrainz_genre.py +++ b/mirdata/datasets/acousticbrainz_genre.py @@ -158,6 +158,18 @@ class Track(core.Track): genre (list): human-labeled genre and subgenres list mbid (str): musicbrainz id mbid_group (str): musicbrainz id group + artist (list): the track's artist/s + title (list): the track's title + date (list): the track's release date/s + filename (str): the track's filename + album (list): the track's album/s + track_number (list): the track number/s + tonal (dict): dictionary of acousticbrainz tonal features + low_level (dict): dictionary of acousticbrainz low-level features + rhythm (dict): dictionary of acousticbrainz rhythm features + + Cached Properties: + acousticbrainz_metadata (dict): dictionary of metadata provided by AcousticBrainz """ @@ -189,30 +201,30 @@ def artist(self): """metadata artist annotation Returns: - str: artist + list: artist """ - return load_extractor(self.path)["metadata"]["artist"] + return self.acousticbrainz_metadata["metadata"]["tags"]["artist"] @property def title(self): """metadata title annotation Returns: - str: title + list: title """ - return load_extractor(self.path)["metadata"]["title"] + return self.acousticbrainz_metadata["metadata"]["tags"]["title"] @property def date(self): """metadata date annotation Returns: - str: date + list: date """ - return load_extractor(self.path)["metadata"]["date"] + return self.acousticbrainz_metadata["metadata"]["tags"]["date"] @property def file_name(self): @@ -221,25 +233,25 @@ def file_name(self): Returns: str: file name """ - return load_extractor(self.path)["metadata"]["file_name"] + return self.acousticbrainz_metadata["metadata"]["tags"]["file_name"] @property def album(self): """metadata album annotation Returns: - str: album + list: album """ - return load_extractor(self.path)["metadata"]["album"] + return self.acousticbrainz_metadata["metadata"]["tags"]["album"] @property def tracknumber(self): """metadata tracknumber annotation Returns: - str: tracknumber + list: tracknumber """ - return load_extractor(self.path)["metadata"]["tracknumber"] + return self.acousticbrainz_metadata["metadata"]["tags"]["tracknumber"] @property def tonal(self): @@ -262,11 +274,11 @@ def tonal(self): ChordsDescriptors """ - return load_extractor(self.path)["tonal"] + return self.acousticbrainz_metadata["tonal"] @property def low_level(self): - """low_level track descritors. + """low_level track descriptors. Returns: dict: @@ -309,7 +321,7 @@ def low_level(self): SpectralContrast """ - return load_extractor(self.path)["low_level"] + return self.acousticbrainz_metadata["lowlevel"] @property def rhythm(self): @@ -332,7 +344,11 @@ def rhythm(self): - 'onset_rate': number of detected onsets per second. Algorithms: OnsetRate - 'danceability': danceability estimate. Algorithms: Danceability """ - return load_extractor(self.path)["metadata"]["rhythm"] + return self.acousticbrainz_metadata["rhythm"] + + @core.cached_property + def acousticbrainz_metadata(self): + return load_extractor(self.path) def to_jams(self): """the track's data in jams format @@ -344,9 +360,9 @@ def to_jams(self): return jams_utils.jams_converter( metadata={ "features": load_extractor(self.path), - "duration": load_extractor(self.path)["metadata"]["audio_properties"][ - "length" - ], + "duration": self.acousticbrainz_metadata["metadata"][ + "audio_properties" + ]["length"], } ) @@ -363,8 +379,7 @@ def load_extractor(fhandle): * float - The sample rate of the audio file """ - meta = json.load(fhandle) - return meta + return json.load(fhandle) @core.docstring_inherit(core.Dataset) diff --git a/mirdata/datasets/cante100.py b/mirdata/datasets/cante100.py index cbab51a0d..d526cb50d 100644 --- a/mirdata/datasets/cante100.py +++ b/mirdata/datasets/cante100.py @@ -188,11 +188,25 @@ def __init__( self.f0_path = os.path.join(self._data_home, self._track_paths["f0"][0]) self.notes_path = os.path.join(self._data_home, self._track_paths["notes"][0]) - self.identifier = self._track_metadata.get("musicBrainzID") - self.artist = self._track_metadata.get("artist") - self.title = self._track_metadata.get("title") - self.release = self._track_metadata.get("release") - self.duration = self._track_metadata.get("duration") + @property + def identifier(self): + return self._track_metadata.get("musicBrainzID") + + @property + def artist(self): + return self._track_metadata.get("artist") + + @property + def title(self): + return self._track_metadata.get("title") + + @property + def release(self): + return self._track_metadata.get("release") + + @property + def duration(self): + return self._track_metadata.get("duration") @property def audio(self) -> Tuple[np.ndarray, float]: diff --git a/mirdata/datasets/dali.py b/mirdata/datasets/dali.py index 76da4b54a..9656570c9 100644 --- a/mirdata/datasets/dali.py +++ b/mirdata/datasets/dali.py @@ -131,18 +131,53 @@ def __init__( ) self.audio_path = os.path.join(self._data_home, self._track_paths["audio"][0]) - self.audio_url = self._track_metadata.get("audio", {}).get("url") - self.url_working = self._track_metadata.get("audio", {}).get("working") - self.ground_truth = self._track_metadata.get("ground-truth") - self.artist = self._track_metadata.get("artist") - self.title = self._track_metadata.get("title") - self.dataset_version = self._track_metadata.get("dataset_version") - self.scores_ncc = self._track_metadata.get("scores", {}).get("NCC") - self.scores_manual = self._track_metadata.get("scores", {}).get("manual") - self.album = self._track_metadata.get("metadata", {}).get("album") - self.release_date = self._track_metadata.get("metadata", {}).get("release_date") - self.genres = self._track_metadata.get("metadata", {}).get("genres") - self.language = self._track_metadata.get("metadata", {}).get("language") + @property + def audio_url(self): + return self._track_metadata.get("audio", {}).get("url") + + @property + def url_working(self): + return self._track_metadata.get("audio", {}).get("working") + + @property + def ground_truth(self): + return self._track_metadata.get("ground-truth") + + @property + def artist(self): + return self._track_metadata.get("artist") + + @property + def title(self): + return self._track_metadata.get("title") + + @property + def dataset_version(self): + return self._track_metadata.get("dataset_version") + + @property + def scores_ncc(self): + return self._track_metadata.get("scores", {}).get("NCC") + + @property + def scores_manual(self): + return self._track_metadata.get("scores", {}).get("manual") + + @property + def album(self): + return self._track_metadata.get("metadata", {}).get("album") + + @property + def release_date(self): + return self._track_metadata.get("metadata", {}).get("release_date") + + @property + def genres(self): + return self._track_metadata.get("metadata", {}).get("genres") + + @property + def language(self): + return self._track_metadata.get("metadata", {}).get("language") @core.cached_property def notes(self) -> annotations.NoteData: diff --git a/mirdata/datasets/groove_midi.py b/mirdata/datasets/groove_midi.py index 4293d49ef..1733ce27d 100644 --- a/mirdata/datasets/groove_midi.py +++ b/mirdata/datasets/groove_midi.py @@ -234,23 +234,52 @@ def __init__( metadata, ) - self.drummer = self._track_metadata.get("drummer") - self.session = self._track_metadata.get("session") - self.style = self._track_metadata.get("style") - self.tempo = self._track_metadata.get("tempo") - self.beat_type = self._track_metadata.get("beat_type") - self.time_signature = self._track_metadata.get("time_signature") - self.duration = self._track_metadata.get("duration") - self.split = self._track_metadata.get("split") - self.midi_filename = self._track_metadata.get("midi_filename") - self.audio_filename = self._track_metadata.get("audio_filename") - self.midi_path = os.path.join(self._data_home, self._track_paths["midi"][0]) self.audio_path = core.none_path_join( [self._data_home, self._track_paths["audio"][0]] ) + @property + def drummer(self): + return self._track_metadata.get("drummer") + + @property + def session(self): + return self._track_metadata.get("session") + + @property + def style(self): + return self._track_metadata.get("style") + + @property + def tempo(self): + return self._track_metadata.get("tempo") + + @property + def beat_type(self): + return self._track_metadata.get("beat_type") + + @property + def time_signature(self): + return self._track_metadata.get("time_signature") + + @property + def duration(self): + return self._track_metadata.get("duration") + + @property + def split(self): + return self._track_metadata.get("split") + + @property + def midi_filename(self): + return self._track_metadata.get("midi_filename") + + @property + def audio_filename(self): + return self._track_metadata.get("audio_filename") + @property def audio(self) -> Tuple[Optional[np.ndarray], Optional[float]]: """The track's audio diff --git a/mirdata/datasets/ikala.py b/mirdata/datasets/ikala.py index f9f3e8d49..fc5e58f50 100644 --- a/mirdata/datasets/ikala.py +++ b/mirdata/datasets/ikala.py @@ -105,7 +105,10 @@ def __init__( self.audio_path = os.path.join(self._data_home, self._track_paths["audio"][0]) self.song_id = track_id.split("_")[0] self.section = track_id.split("_")[1] - self.singer_id = self._track_metadata.get(self.song_id) + + @property + def singer_id(self): + return self._track_metadata.get(self.song_id) @core.cached_property def f0(self) -> Optional[annotations.F0Data]: diff --git a/mirdata/datasets/maestro.py b/mirdata/datasets/maestro.py index 962897fd2..0afb823e8 100644 --- a/mirdata/datasets/maestro.py +++ b/mirdata/datasets/maestro.py @@ -126,11 +126,25 @@ def __init__( self.audio_path = os.path.join(self._data_home, self._track_paths["audio"][0]) self.midi_path = os.path.join(self._data_home, self._track_paths["midi"][0]) - self.canonical_composer = self._track_metadata.get("canonical_composer") - self.canonical_title = self._track_metadata.get("canonical_title") - self.split = self._track_metadata.get("split") - self.year = self._track_metadata.get("year") - self.duration = self._track_metadata.get("duration") + @property + def canonical_composer(self): + return self._track_metadata.get("canonical_composer") + + @property + def canonical_title(self): + return self._track_metadata.get("canonical_title") + + @property + def split(self): + return self._track_metadata.get("split") + + @property + def year(self): + return self._track_metadata.get("year") + + @property + def duration(self): + return self._track_metadata.get("duration") @core.cached_property def midi(self) -> Optional[pretty_midi.PrettyMIDI]: diff --git a/mirdata/datasets/medley_solos_db.py b/mirdata/datasets/medley_solos_db.py index cf992252d..0316ae38f 100644 --- a/mirdata/datasets/medley_solos_db.py +++ b/mirdata/datasets/medley_solos_db.py @@ -96,10 +96,22 @@ def __init__( ) self.audio_path = os.path.join(self._data_home, self._track_paths["audio"][0]) - self.instrument = self._track_metadata.get("instrument") - self.instrument_id = self._track_metadata.get("instrument_id") - self.song_id = self._track_metadata.get("song_id") - self.subset = self._track_metadata.get("subset") + + @property + def instrument(self): + return self._track_metadata.get("instrument") + + @property + def instrument_id(self): + return self._track_metadata.get("instrument_id") + + @property + def song_id(self): + return self._track_metadata.get("song_id") + + @property + def subset(self): + return self._track_metadata.get("subset") @property def audio(self) -> Optional[Tuple[np.ndarray, float]]: diff --git a/mirdata/datasets/medleydb_melody.py b/mirdata/datasets/medleydb_melody.py index da5c47474..ec3b84d42 100644 --- a/mirdata/datasets/medleydb_melody.py +++ b/mirdata/datasets/medleydb_melody.py @@ -106,12 +106,30 @@ def __init__( ) self.audio_path = os.path.join(self._data_home, self._track_paths["audio"][0]) - self.artist = self._track_metadata.get("artist") - self.title = self._track_metadata.get("title") - self.genre = self._track_metadata.get("genre") - self.is_excerpt = self._track_metadata.get("is_excerpt") - self.is_instrumental = self._track_metadata.get("is_instrumental") - self.n_sources = self._track_metadata.get("n_sources") + + @property + def artist(self): + return self._track_metadata.get("artist") + + @property + def title(self): + return self._track_metadata.get("title") + + @property + def genre(self): + return self._track_metadata.get("genre") + + @property + def is_excerpt(self): + return self._track_metadata.get("is_excerpt") + + @property + def is_instrumental(self): + return self._track_metadata.get("is_instrumental") + + @property + def n_sources(self): + return self._track_metadata.get("n_sources") @core.cached_property def melody1(self) -> Optional[annotations.F0Data]: diff --git a/mirdata/datasets/medleydb_pitch.py b/mirdata/datasets/medleydb_pitch.py index f07bdda2b..d63584eef 100644 --- a/mirdata/datasets/medleydb_pitch.py +++ b/mirdata/datasets/medleydb_pitch.py @@ -93,10 +93,22 @@ def __init__( self.pitch_path = os.path.join(self._data_home, self._track_paths["pitch"][0]) self.audio_path = os.path.join(self._data_home, self._track_paths["audio"][0]) - self.instrument = self._track_metadata.get("instrument") - self.artist = self._track_metadata.get("artist") - self.title = self._track_metadata.get("title") - self.genre = self._track_metadata.get("genre") + + @property + def instrument(self): + return self._track_metadata.get("instrument") + + @property + def artist(self): + return self._track_metadata.get("artist") + + @property + def title(self): + return self._track_metadata.get("title") + + @property + def genre(self): + return self._track_metadata.get("genre") @core.cached_property def pitch(self) -> Optional[annotations.F0Data]: diff --git a/mirdata/datasets/orchset.py b/mirdata/datasets/orchset.py index b9ad01816..1136bc8ea 100644 --- a/mirdata/datasets/orchset.py +++ b/mirdata/datasets/orchset.py @@ -103,19 +103,50 @@ def __init__( self.audio_path_stereo = os.path.join( self._data_home, self._track_paths["audio_stereo"][0] ) - self.composer = self._track_metadata.get("composer") - self.work = self._track_metadata.get("work") - self.excerpt = self._track_metadata.get("excerpt") - self.predominant_melodic_instruments = self._track_metadata.get( - "predominant_melodic_instruments-normalized" - ) - self.alternating_melody = self._track_metadata.get("alternating_melody") - self.contains_winds = self._track_metadata.get("contains_winds") - self.contains_strings = self._track_metadata.get("contains_strings") - self.contains_brass = self._track_metadata.get("contains_brass") - self.only_strings = self._track_metadata.get("only_strings") - self.only_winds = self._track_metadata.get("only_winds") - self.only_brass = self._track_metadata.get("only_brass") + + @property + def composer(self): + return self._track_metadata.get("composer") + + @property + def work(self): + return self._track_metadata.get("work") + + @property + def excerpt(self): + return self._track_metadata.get("excerpt") + + @property + def predominant_melodic_instruments(self): + return self._track_metadata.get("predominant_melodic_instruments-normalized") + + @property + def alternating_melody(self): + return self._track_metadata.get("alternating_melody") + + @property + def contains_winds(self): + return self._track_metadata.get("contains_winds") + + @property + def contains_strings(self): + return self._track_metadata.get("contains_strings") + + @property + def contains_brass(self): + return self._track_metadata.get("contains_brass") + + @property + def only_strings(self): + return self._track_metadata.get("only_strings") + + @property + def only_winds(self): + return self._track_metadata.get("only_winds") + + @property + def only_brass(self): + return self._track_metadata.get("only_brass") @core.cached_property def melody(self) -> Optional[annotations.F0Data]: diff --git a/mirdata/datasets/rwc_classical.py b/mirdata/datasets/rwc_classical.py index 55f012c23..a7602eaf7 100644 --- a/mirdata/datasets/rwc_classical.py +++ b/mirdata/datasets/rwc_classical.py @@ -163,14 +163,37 @@ def __init__( self.audio_path = os.path.join(self._data_home, self._track_paths["audio"][0]) - self.piece_number = self._track_metadata.get("piece_number") - self.suffix = self._track_metadata.get("suffix") - self.track_number = self._track_metadata.get("track_number") - self.title = self._track_metadata.get("title") - self.composer = self._track_metadata.get("composer") - self.artist = self._track_metadata.get("artist") - self.duration = self._track_metadata.get("duration") - self.category = self._track_metadata.get("category") + @property + def piece_number(self): + return self._track_metadata.get("piece_number") + + @property + def suffix(self): + return self._track_metadata.get("suffix") + + @property + def track_number(self): + return self._track_metadata.get("track_number") + + @property + def title(self): + return self._track_metadata.get("title") + + @property + def composer(self): + return self._track_metadata.get("composer") + + @property + def artist(self): + return self._track_metadata.get("artist") + + @property + def duration(self): + return self._track_metadata.get("duration") + + @property + def category(self): + return self._track_metadata.get("category") @core.cached_property def sections(self) -> Optional[annotations.SectionData]: diff --git a/mirdata/datasets/rwc_jazz.py b/mirdata/datasets/rwc_jazz.py index af91a6315..d0f262e7f 100644 --- a/mirdata/datasets/rwc_jazz.py +++ b/mirdata/datasets/rwc_jazz.py @@ -149,14 +149,37 @@ def __init__( self.audio_path = os.path.join(self._data_home, self._track_paths["audio"][0]) - self.piece_number = self._track_metadata.get("piece_number") - self.suffix = self._track_metadata.get("suffix") - self.track_number = self._track_metadata.get("track_number") - self.title = self._track_metadata.get("title") - self.artist = self._track_metadata.get("artist") - self.duration = self._track_metadata.get("duration") - self.variation = self._track_metadata.get("variation") - self.instruments = self._track_metadata.get("instruments") + @property + def piece_number(self): + return self._track_metadata.get("piece_number") + + @property + def suffix(self): + return self._track_metadata.get("suffix") + + @property + def track_number(self): + return self._track_metadata.get("track_number") + + @property + def title(self): + return self._track_metadata.get("title") + + @property + def artist(self): + return self._track_metadata.get("artist") + + @property + def duration(self): + return self._track_metadata.get("duration") + + @property + def variation(self): + return self._track_metadata.get("variation") + + @property + def instruments(self): + return self._track_metadata.get("instruments") @core.cached_property def sections(self) -> Optional[annotations.SectionData]: diff --git a/mirdata/datasets/rwc_popular.py b/mirdata/datasets/rwc_popular.py index 6a4a567e4..6ce12f3c8 100644 --- a/mirdata/datasets/rwc_popular.py +++ b/mirdata/datasets/rwc_popular.py @@ -161,16 +161,45 @@ def __init__( self.audio_path = os.path.join(self._data_home, self._track_paths["audio"][0]) - self.piece_number = self._track_metadata.get("piece_number") - self.suffix = self._track_metadata.get("suffix") - self.track_number = self._track_metadata.get("track_number") - self.title = self._track_metadata.get("title") - self.artist = self._track_metadata.get("artist") - self.singer_information = self._track_metadata.get("singer_information") - self.duration = self._track_metadata.get("duration") - self.tempo = self._track_metadata.get("tempo") - self.instruments = self._track_metadata.get("instruments") - self.drum_information = self._track_metadata.get("drum_information") + @property + def piece_number(self): + return self._track_metadata.get("piece_number") + + @property + def suffix(self): + return self._track_metadata.get("suffix") + + @property + def track_number(self): + return self._track_metadata.get("track_number") + + @property + def title(self): + return self._track_metadata.get("title") + + @property + def artist(self): + return self._track_metadata.get("artist") + + @property + def singer_information(self): + return self._track_metadata.get("singer_information") + + @property + def duration(self): + return self._track_metadata.get("duration") + + @property + def tempo(self): + return self._track_metadata.get("tempo") + + @property + def instruments(self): + return self._track_metadata.get("instruments") + + @property + def drum_information(self): + return self._track_metadata.get("drum_information") @core.cached_property def sections(self) -> Optional[annotations.SectionData]: diff --git a/mirdata/datasets/salami.py b/mirdata/datasets/salami.py index 592a7a61a..03962b56f 100644 --- a/mirdata/datasets/salami.py +++ b/mirdata/datasets/salami.py @@ -120,16 +120,46 @@ def __init__( ) self.audio_path = os.path.join(self._data_home, self._track_paths["audio"][0]) - self.source = self._track_metadata.get("source") - self.annotator_1_id = self._track_metadata.get("annotator_1_id") - self.annotator_2_id = self._track_metadata.get("annotator_2_id") - self.duration = self._track_metadata.get("duration") - self.title = self._track_metadata.get("title") - self.artist = self._track_metadata.get("artist") - self.annotator_1_time = self._track_metadata.get("annotator_1_time") - self.annotator_2_time = self._track_metadata.get("annotator_2_time") - self.broad_genre = self._track_metadata.get("class") - self.genre = self._track_metadata.get("genre") + + @property + def source(self): + return self._track_metadata.get("source") + + @property + def annotator_1_id(self): + return self._track_metadata.get("annotator_1_id") + + @property + def annotator_2_id(self): + return self._track_metadata.get("annotator_2_id") + + @property + def duration(self): + return self._track_metadata.get("duration") + + @property + def title(self): + return self._track_metadata.get("title") + + @property + def artist(self): + return self._track_metadata.get("artist") + + @property + def annotator_1_time(self): + return self._track_metadata.get("annotator_1_time") + + @property + def annotator_2_time(self): + return self._track_metadata.get("annotator_2_time") + + @property + def broad_genre(self): + return self._track_metadata.get("class") + + @property + def genre(self): + return self._track_metadata.get("genre") @core.cached_property def sections_annotator_1_uppercase(self) -> Optional[annotations.SectionData]: diff --git a/mirdata/datasets/tinysol.py b/mirdata/datasets/tinysol.py index 56189d654..a84dd80aa 100644 --- a/mirdata/datasets/tinysol.py +++ b/mirdata/datasets/tinysol.py @@ -127,18 +127,53 @@ def __init__( self.audio_path = os.path.join(self._data_home, self._track_paths["audio"][0]) - self.family = self._track_metadata.get("Family") - self.instrument_abbr = self._track_metadata.get("Instrument (abbr.)") - self.instrument_full = self._track_metadata.get("Instrument (in full)") - self.technique_abbr = self._track_metadata.get("Technique (abbr.)") - self.technique_full = self._track_metadata.get("Technique (in full)") - self.pitch = self._track_metadata.get("Pitch") - self.pitch_id = self._track_metadata.get("Pitch ID") - self.dynamics = self._track_metadata.get("Dynamics") - self.dynamics_id = self._track_metadata.get("Dynamics ID") - self.instance_id = self._track_metadata.get("Instance ID") - self.string_id = self._track_metadata.get("String ID") - self.is_resampled = self._track_metadata.get("Resampled") + @property + def family(self): + return self._track_metadata.get("Family") + + @property + def instrument_abbr(self): + return self._track_metadata.get("Instrument (abbr.)") + + @property + def instrument_full(self): + return self._track_metadata.get("Instrument (in full)") + + @property + def technique_abbr(self): + return self._track_metadata.get("Technique (abbr.)") + + @property + def technique_full(self): + return self._track_metadata.get("Technique (in full)") + + @property + def pitch(self): + return self._track_metadata.get("Pitch") + + @property + def pitch_id(self): + return self._track_metadata.get("Pitch ID") + + @property + def dynamics(self): + return self._track_metadata.get("Dynamics") + + @property + def dynamics_id(self): + return self._track_metadata.get("Dynamics ID") + + @property + def instance_id(self): + return self._track_metadata.get("Instance ID") + + @property + def string_id(self): + return self._track_metadata.get("String ID") + + @property + def is_resampled(self): + return self._track_metadata.get("Resampled") @property def audio(self) -> Optional[Tuple[np.ndarray, float]]: diff --git a/tests/test_acousticbrainz_genre.py b/tests/test_acousticbrainz_genre.py index 552d6e35d..7e404ad25 100644 --- a/tests/test_acousticbrainz_genre.py +++ b/tests/test_acousticbrainz_genre.py @@ -1,7 +1,7 @@ import os import shutil -from mirdata import jams_utils, download_utils, core +from mirdata import download_utils from mirdata.datasets import acousticbrainz_genre from tests.test_utils import run_track_tests @@ -23,15 +23,16 @@ def test_track(): } expected_property_types = { - "artist": str, - "title": str, - "date": str, + "artist": list, + "title": list, + "date": list, "file_name": str, - "album": str, - "tracknumber": str, + "album": list, + "tracknumber": list, "tonal": dict, "low_level": dict, "rhythm": dict, + "acousticbrainz_metadata": dict, } run_track_tests(track, expected_attributes, expected_property_types) diff --git a/tests/test_beatles.py b/tests/test_beatles.py index 232c3a0e2..4ca083047 100644 --- a/tests/test_beatles.py +++ b/tests/test_beatles.py @@ -31,6 +31,7 @@ def test_track(): "chords": annotations.ChordData, "key": annotations.KeyData, "sections": annotations.SectionData, + "audio": tuple, } run_track_tests(track, expected_attributes, expected_property_types) diff --git a/tests/test_beatport_key.py b/tests/test_beatport_key.py index df5819cd2..b5a29c13c 100644 --- a/tests/test_beatport_key.py +++ b/tests/test_beatport_key.py @@ -23,6 +23,7 @@ def test_track(): "genres": dict, "artists": list, "tempo": int, + "audio": tuple, } run_track_tests(track, expected_attributes, expected_property_types) diff --git a/tests/test_cante100.py b/tests/test_cante100.py index 63b046ee0..290b170fd 100644 --- a/tests/test_cante100.py +++ b/tests/test_cante100.py @@ -35,6 +35,8 @@ def test_track(): expected_property_types = { "melody": annotations.F0Data, "notes": annotations.NoteData, + "audio": tuple, + "spectrogram": np.ndarray, } run_track_tests(track, expected_attributes, expected_property_types) diff --git a/tests/test_dali.py b/tests/test_dali.py index f39cb934f..89c4aef30 100644 --- a/tests/test_dali.py +++ b/tests/test_dali.py @@ -39,6 +39,7 @@ def test_track(): "lines": annotations.LyricData, "paragraphs": annotations.LyricData, "annotation_object": DALI.Annotations, + "audio": tuple, } run_track_tests(track, expected_attributes, expected_property_types) diff --git a/tests/test_giantsteps_key.py b/tests/test_giantsteps_key.py index dd8ce049e..94f95a18b 100644 --- a/tests/test_giantsteps_key.py +++ b/tests/test_giantsteps_key.py @@ -26,6 +26,7 @@ def test_track(): "genres": dict, "artists": list, "tempo": int, + "audio": tuple, } run_track_tests(track, expected_attributes, expected_property_types) diff --git a/tests/test_giantsteps_tempo.py b/tests/test_giantsteps_tempo.py index 1b96e73fe..c31498038 100644 --- a/tests/test_giantsteps_tempo.py +++ b/tests/test_giantsteps_tempo.py @@ -25,6 +25,7 @@ def test_track(): "tempo": annotations.TempoData, "tempo_v2": annotations.TempoData, "genre": str, + "audio": tuple, } run_track_tests(track, expected_attributes, expected_property_types) diff --git a/tests/test_groove_midi.py b/tests/test_groove_midi.py index 0b5c8029f..2f227ffff 100644 --- a/tests/test_groove_midi.py +++ b/tests/test_groove_midi.py @@ -37,6 +37,7 @@ def test_track(): "beats": annotations.BeatData, "drum_events": annotations.EventData, "midi": pretty_midi.PrettyMIDI, + "audio": tuple, } assert track._track_paths == { diff --git a/tests/test_gtzan_genre.py b/tests/test_gtzan_genre.py index 83055057c..9be9f210e 100644 --- a/tests/test_gtzan_genre.py +++ b/tests/test_gtzan_genre.py @@ -17,7 +17,8 @@ def test_track(): + "gtzan_genre/genres/country/country.00000.wav", "track_id": "country.00000", } - run_track_tests(track, expected_attributes, {}) + expected_properties = {"audio": tuple} + run_track_tests(track, expected_attributes, expected_properties) audio, sr = track.audio assert sr == 22050 diff --git a/tests/test_guitarset.py b/tests/test_guitarset.py index 25d3e32ca..1665b7f8e 100644 --- a/tests/test_guitarset.py +++ b/tests/test_guitarset.py @@ -38,6 +38,10 @@ def test_track(): "key_mode": annotations.KeyData, "pitch_contours": dict, "notes": dict, + "audio_mic": tuple, + "audio_mix": tuple, + "audio_hex": tuple, + "audio_hex_cln": tuple, } run_track_tests(track, expected_attributes, expected_property_types) diff --git a/tests/test_ikala.py b/tests/test_ikala.py index c5566722a..65d7f5b27 100644 --- a/tests/test_ikala.py +++ b/tests/test_ikala.py @@ -25,6 +25,9 @@ def test_track(): expected_property_types = { "f0": annotations.F0Data, "lyrics": annotations.LyricData, + "vocal_audio": tuple, + "instrumental_audio": tuple, + "mix_audio": tuple, } assert track._track_paths == { diff --git a/tests/test_irmas.py b/tests/test_irmas.py index efc575ef0..7e1543092 100644 --- a/tests/test_irmas.py +++ b/tests/test_irmas.py @@ -34,7 +34,10 @@ def test_track(): "train": True, } - expected_property_test_types = {"instrument": list} + expected_property_test_types = { + "instrument": list, + "audio": tuple, + } run_track_tests(track, expected_attributes, expected_property_test_types) run_track_tests( diff --git a/tests/test_loaders.py b/tests/test_loaders.py index 1b2e2af9d..bb83220f4 100644 --- a/tests/test_loaders.py +++ b/tests/test_loaders.py @@ -10,7 +10,7 @@ import mirdata from mirdata import core, download_utils -from tests.test_utils import DEFAULT_DATA_HOME +from tests.test_utils import DEFAULT_DATA_HOME, get_attributes_and_properties DATASETS = mirdata.DATASETS CUSTOM_TEST_TRACKS = { @@ -19,6 +19,7 @@ "giantsteps_key": "3", "dali": "4b196e6c99574dd49ad00d56e132712b", "giantsteps_tempo": "113", + "gtzan_genre": "country.00000", "guitarset": "03_BN3-119-G_solo", "irmas": "1", "medley_solos_db": "d07b1fc0-567d-52c2-fef4-239f31c9d40e", @@ -255,6 +256,18 @@ def test_track(): track_test, "to_jams" ), "{}.track must have a to_jams method".format(dataset_name) + # test calling all attributes, properties and cached properties + track_data = get_attributes_and_properties(track_test) + + for attr in track_data["attributes"]: + ret = getattr(track_test, attr) + + for prop in track_data["properties"]: + ret = getattr(track_test, prop) + + for cprop in track_data["cached_properties"]: + ret = getattr(track_test, cprop) + # Validate JSON schema try: jam = track_test.to_jams() @@ -278,6 +291,46 @@ def test_track(): dataset.track("~faketrackid~?!") +# This tests the case where there is no data in data_home. +# It makes sure that the track can be initialized and the +# attributes accessed, but that anything requiring data +# files errors (all properties and cached properties). +def test_track_placeholder_case(): + data_home_dir = "not/a/real/path" + + for dataset_name in DATASETS: + data_home = os.path.join(data_home_dir, dataset_name) + + module = importlib.import_module("mirdata.datasets.{}".format(dataset_name)) + dataset = module.Dataset(os.path.join(data_home, dataset_name)) + + if dataset._track_class is None or dataset.remote_index: + continue + + if dataset_name in CUSTOM_TEST_TRACKS: + trackid = CUSTOM_TEST_TRACKS[dataset_name] + else: + trackid = dataset.track_ids[0] + + try: + track_test = dataset.track(trackid) + except: + assert False, "{}: {}".format(dataset_name, sys.exc_info()[0]) + + track_data = get_attributes_and_properties(track_test) + + for attr in track_data["attributes"]: + ret = getattr(track_test, attr) + + for prop in track_data["properties"]: + with pytest.raises(Exception): + ret = getattr(track_test, prop) + + for cprop in track_data["cached_properties"]: + with pytest.raises(Exception): + ret = getattr(track_test, cprop) + + # for load_* functions which require more than one argument # module_name : {function_name: {parameter2: value, parameter3: value}} EXCEPTIONS = { diff --git a/tests/test_maestro.py b/tests/test_maestro.py index aaf6abeba..f4feb3604 100644 --- a/tests/test_maestro.py +++ b/tests/test_maestro.py @@ -33,6 +33,7 @@ def test_track(): expected_property_types = { "notes": annotations.NoteData, "midi": pretty_midi.PrettyMIDI, + "audio": tuple, } assert track._track_paths == { diff --git a/tests/test_medley_solos_db.py b/tests/test_medley_solos_db.py index c0baf9434..06b09d2bd 100644 --- a/tests/test_medley_solos_db.py +++ b/tests/test_medley_solos_db.py @@ -18,7 +18,7 @@ def test_track(): "subset": "validation", } - expected_property_types = {} + expected_property_types = {"audio": tuple} run_track_tests(track, expected_attributes, expected_property_types) diff --git a/tests/test_medleydb_melody.py b/tests/test_medleydb_melody.py index 175ff02c3..93a19fc07 100644 --- a/tests/test_medleydb_melody.py +++ b/tests/test_medleydb_melody.py @@ -33,6 +33,7 @@ def test_track(): "melody1": annotations.F0Data, "melody2": annotations.F0Data, "melody3": annotations.MultiF0Data, + "audio": tuple, } run_track_tests(track, expected_attributes, expected_property_types) diff --git a/tests/test_medleydb_pitch.py b/tests/test_medleydb_pitch.py index 2e6bd8ef2..1ab8af471 100644 --- a/tests/test_medleydb_pitch.py +++ b/tests/test_medleydb_pitch.py @@ -23,7 +23,7 @@ def test_track(): "genre": "Singer/Songwriter", } - expected_property_types = {"pitch": annotations.F0Data} + expected_property_types = {"pitch": annotations.F0Data, "audio": tuple} run_track_tests(track, expected_attributes, expected_property_types) diff --git a/tests/test_mridangam_stroke.py b/tests/test_mridangam_stroke.py index 1a629fb56..0709eeeca 100644 --- a/tests/test_mridangam_stroke.py +++ b/tests/test_mridangam_stroke.py @@ -18,7 +18,7 @@ def test_track(): "tonic": "B", } - run_track_tests(track, expected_attributes, {}) + run_track_tests(track, expected_attributes, {"audio": tuple}) audio, sr = track.audio assert sr == 44100 diff --git a/tests/test_orchset.py b/tests/test_orchset.py index 64e55cba9..0a623b5ea 100644 --- a/tests/test_orchset.py +++ b/tests/test_orchset.py @@ -34,7 +34,11 @@ def test_track(): "only_brass": False, } - expected_property_types = {"melody": annotations.F0Data} + expected_property_types = { + "melody": annotations.F0Data, + "audio_mono": tuple, + "audio_stereo": tuple, + } run_track_tests(track, expected_attributes, expected_property_types) diff --git a/tests/test_rwc_classical.py b/tests/test_rwc_classical.py index 7f46fed97..cbfe08e8a 100644 --- a/tests/test_rwc_classical.py +++ b/tests/test_rwc_classical.py @@ -32,6 +32,7 @@ def test_track(): expected_property_types = { "beats": annotations.BeatData, "sections": annotations.SectionData, + "audio": tuple, } run_track_tests(track, expected_attributes, expected_property_types) diff --git a/tests/test_rwc_jazz.py b/tests/test_rwc_jazz.py index 385a24bc7..b74ebf3dd 100644 --- a/tests/test_rwc_jazz.py +++ b/tests/test_rwc_jazz.py @@ -31,6 +31,7 @@ def test_track(): expected_property_types = { "beats": annotations.BeatData, "sections": annotations.SectionData, + "audio": tuple, } run_track_tests(track, expected_attributes, expected_property_types) diff --git a/tests/test_rwc_popular.py b/tests/test_rwc_popular.py index 56e9ffe7f..9043a4f90 100644 --- a/tests/test_rwc_popular.py +++ b/tests/test_rwc_popular.py @@ -41,6 +41,7 @@ def test_track(): "sections": annotations.SectionData, "chords": annotations.ChordData, "vocal_instrument_activity": annotations.EventData, + "audio": tuple, } run_track_tests(track, expected_attributes, expected_property_types) diff --git a/tests/test_salami.py b/tests/test_salami.py index 9d0fe7b39..d6e0ab5cc 100644 --- a/tests/test_salami.py +++ b/tests/test_salami.py @@ -39,6 +39,7 @@ def test_track(): "sections_annotator_1_lowercase": annotations.SectionData, "sections_annotator_2_uppercase": annotations.SectionData, "sections_annotator_2_lowercase": annotations.SectionData, + "audio": tuple, } run_track_tests(track, expected_attributes, expected_property_types) diff --git a/tests/test_saraga_carnatic.py b/tests/test_saraga_carnatic.py index dde665eb1..c78030429 100644 --- a/tests/test_saraga_carnatic.py +++ b/tests/test_saraga_carnatic.py @@ -58,13 +58,6 @@ def test_track(): } expected_property_types = { - "audio-mix": (np.ndarray, float), - "audio-ghatam": (np.ndarray, float), - "audio-mridangam-left": (np.ndarray, float), - "audio-mridangam-right": (np.ndarray, float), - "audio-violin": (np.ndarray, float), - "audio-vocal-s": (np.ndarray, float), - "audio-vocal": (np.ndarray, float), "tempo": dict, "phrases": annotations.EventData, "pitch": annotations.F0Data, @@ -73,6 +66,7 @@ def test_track(): "sections": annotations.SectionData, "tonic": float, "metadata": dict, + "audio": tuple, } run_track_tests(track, expected_attributes, expected_property_types) diff --git a/tests/test_saraga_hindustani.py b/tests/test_saraga_hindustani.py index 00733268a..b89a1111b 100644 --- a/tests/test_saraga_hindustani.py +++ b/tests/test_saraga_hindustani.py @@ -33,7 +33,6 @@ def test_track(): } expected_property_types = { - "audio": (np.ndarray, float), "tempo": dict, "phrases": annotations.EventData, "pitch": annotations.F0Data, @@ -41,6 +40,7 @@ def test_track(): "sections": annotations.SectionData, "tonic": float, "metadata": dict, + "audio": tuple, } run_track_tests(track, expected_attributes, expected_property_types) diff --git a/tests/test_tinysol.py b/tests/test_tinysol.py index 40ec960b9..38cc00b1f 100644 --- a/tests/test_tinysol.py +++ b/tests/test_tinysol.py @@ -29,7 +29,9 @@ def test_track(): "string_id": None, } - expected_property_types = {} + expected_property_types = { + "audio": tuple, + } run_track_tests(track, expected_attributes, expected_property_types) diff --git a/tests/test_tonality_classicaldb.py b/tests/test_tonality_classicaldb.py index 347801897..5eea0204a 100644 --- a/tests/test_tonality_classicaldb.py +++ b/tests/test_tonality_classicaldb.py @@ -28,6 +28,7 @@ def test_track(): "spectrum": np.ndarray, "hpcp": np.ndarray, "musicbrainz_metadata": dict, + "audio": tuple, } run_track_tests(track, expected_attributes, expected_property_types) diff --git a/tests/test_utils.py b/tests/test_utils.py index 77a829d3b..e1053b423 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -21,9 +21,16 @@ def run_track_tests(track, expected_attributes, expected_property_types): assert expected_attributes[attr] == getattr(track, attr) # test track property types - for prop in track_attr["cached_properties"]: + for prop in track_attr["cached_properties"] + track_attr["properties"]: print("{}: {}".format(prop, type(getattr(track, prop)))) - assert isinstance(getattr(track, prop), expected_property_types[prop]) + if prop in expected_property_types: + assert isinstance(getattr(track, prop), expected_property_types[prop]) + elif prop in expected_attributes: + assert expected_attributes[prop] == getattr(track, prop) + else: + assert ( + False + ), "{} not in expected_property_types or expected_attributes".format(prop) def get_attributes_and_properties(class_instance):