From e647e57b555365cffea74186c7b5e779e25f66e7 Mon Sep 17 00:00:00 2001 From: hyperc54 Date: Fri, 31 Oct 2025 18:47:00 +0100 Subject: [PATCH 1/7] Update mirdata, add local index fixtures --- basic_pitch/data/datasets/guitarset.py | 1 + basic_pitch/data/datasets/ikala.py | 3 + basic_pitch/data/datasets/maestro.py | 21 +- basic_pitch/data/datasets/medleydb_pitch.py | 1 + basic_pitch/data/datasets/slakh.py | 1 + pyproject.toml | 4 +- tests/data/test_guitarset.py | 17 +- tests/data/test_ikala.py | 18 +- tests/data/test_maestro.py | 26 +- tests/data/test_medleydb_pitch.py | 16 +- tests/data/test_slakh.py | 15 +- .../resources/data/guitarset/dummy_index.json | 71 ++ tests/resources/data/ikala/dummy_index.json | 47 ++ tests/resources/data/maestro/dummy_index.json | 55 ++ .../data/medleydb_pitch/dummy_index.json | 39 + tests/resources/data/slakh/dummy_index.json | 686 ++++++++++++++++++ 16 files changed, 988 insertions(+), 33 deletions(-) create mode 100644 tests/resources/data/guitarset/dummy_index.json create mode 100644 tests/resources/data/ikala/dummy_index.json create mode 100644 tests/resources/data/maestro/dummy_index.json create mode 100644 tests/resources/data/medleydb_pitch/dummy_index.json create mode 100644 tests/resources/data/slakh/dummy_index.json diff --git a/basic_pitch/data/datasets/guitarset.py b/basic_pitch/data/datasets/guitarset.py index 484ad06..6d02623 100644 --- a/basic_pitch/data/datasets/guitarset.py +++ b/basic_pitch/data/datasets/guitarset.py @@ -144,6 +144,7 @@ def determine_split(index: int) -> str: return "test" guitarset = mirdata.initialize("guitarset") + guitarset.download(["index"]) track_ids = guitarset.track_ids random.shuffle(track_ids) diff --git a/basic_pitch/data/datasets/ikala.py b/basic_pitch/data/datasets/ikala.py index 2f35cf4..bba9bbe 100644 --- a/basic_pitch/data/datasets/ikala.py +++ b/basic_pitch/data/datasets/ikala.py @@ -29,6 +29,8 @@ from basic_pitch.data import commandline, pipeline +# Oct 2025: Ikala remote download is broken so we're only loading a sample +# TODO: Re-evaluate later class IkalaInvalidTracks(beam.DoFn): def process(self, element: Tuple[str, str], *args: Tuple[Any, Any], **kwargs: Dict[str, Any]) -> Any: track_id, split = element @@ -142,6 +144,7 @@ def create_input_data(train_percent: float, seed: Optional[int] = None) -> List[ random.seed(seed) ikala = mirdata.initialize("ikala") + ikala.download(["index"]) track_ids = ikala.track_ids random.shuffle(track_ids) diff --git a/basic_pitch/data/datasets/maestro.py b/basic_pitch/data/datasets/maestro.py index 19e7226..64f57df 100644 --- a/basic_pitch/data/datasets/maestro.py +++ b/basic_pitch/data/datasets/maestro.py @@ -19,7 +19,6 @@ import logging import os import sys -import tempfile import time from typing import Any, Dict, List, TextIO, Tuple @@ -164,20 +163,10 @@ def process(self, element: List[str], *args: Tuple[Any, Any], **kwargs: Dict[str return [batch] -def create_input_data(source: str) -> List[Tuple[str, str]]: - import apache_beam as beam - - filesystem = beam.io.filesystems.FileSystems() - - with tempfile.TemporaryDirectory() as tmpdir: - maestro = mirdata.initialize("maestro", data_home=tmpdir) - metadata_path = maestro._index["metadata"]["maestro-v2.0.0"][0] - with filesystem.open( - os.path.join(source, metadata_path), - ) as s, open(os.path.join(tmpdir, metadata_path), "wb") as d: - d.write(s.read()) - - return [(track_id, track.split) for track_id, track in maestro.load_tracks().items()] +def create_input_data() -> List[Tuple[str, str]]: + maestro = mirdata.initialize("maestro") + maestro.download(["metadata"]) + return [(track_id, track.split) for track_id, track in maestro.load_tracks().items()] def main(known_args: argparse.Namespace, pipeline_args: List[str]) -> None: @@ -198,7 +187,7 @@ def main(known_args: argparse.Namespace, pipeline_args: List[str]) -> None: "environment_type": "DOCKER", "environment_config": known_args.sdk_container_image, } - input_data = create_input_data(known_args.source) + input_data = create_input_data() pipeline.run( pipeline_options, pipeline_args, diff --git a/basic_pitch/data/datasets/medleydb_pitch.py b/basic_pitch/data/datasets/medleydb_pitch.py index dd168e0..eac3529 100644 --- a/basic_pitch/data/datasets/medleydb_pitch.py +++ b/basic_pitch/data/datasets/medleydb_pitch.py @@ -140,6 +140,7 @@ def create_input_data(train_percent: float, seed: Optional[int] = None) -> List[ random.seed(seed) medleydb_pitch = mirdata.initialize("medleydb_pitch") + medleydb_pitch.download(["index"]) track_ids = medleydb_pitch.track_ids random.shuffle(track_ids) diff --git a/basic_pitch/data/datasets/slakh.py b/basic_pitch/data/datasets/slakh.py index 57bbd6f..fa0b5a0 100644 --- a/basic_pitch/data/datasets/slakh.py +++ b/basic_pitch/data/datasets/slakh.py @@ -182,6 +182,7 @@ def process(self, element: List[str]) -> List[Any]: def create_input_data() -> List[Tuple[str, str]]: slakh = mirdata.initialize("slakh") + slakh.download(["index"]) return [(track_id, track.data_split) for track_id, track in slakh.load_tracks().items()] diff --git a/pyproject.toml b/pyproject.toml index 22f744b..e8dc15c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -55,9 +55,7 @@ bp-download = "basic_pitch.data.download:main" data = [ "basic_pitch[tf,test]", "apache_beam", - # TODO: mirdata 0.3.9 moves dataset indexes files which breaks our tests - # Adapt our codebase to release that constraint - "mirdata<=0.3.8", + "mirdata>=1.0.0", "smart_open", "sox", "ffmpeg-python" diff --git a/tests/data/test_guitarset.py b/tests/data/test_guitarset.py index d93a502..8bcfe4e 100644 --- a/tests/data/test_guitarset.py +++ b/tests/data/test_guitarset.py @@ -14,12 +14,14 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from unittest import mock import apache_beam as beam import itertools import os import pathlib import shutil - +import pytest +import json from apache_beam.testing.test_pipeline import TestPipeline from typing import List @@ -35,8 +37,17 @@ RESOURCES_PATH = pathlib.Path(__file__).parent.parent / "resources" TRACK_ID = "00_BN1-129-Eb_comp" +GUITAR_SET_TEST_INDEX = json.load(open(RESOURCES_PATH / "data" / "guitarset" / "dummy_index.json")) + + +@pytest.fixture # type: ignore[misc] +def mock_guitarset_index() -> None: # type: ignore[misc] + with mock.patch("mirdata.datasets.guitarset.Dataset.download"): + with mock.patch("mirdata.datasets.guitarset.Dataset._index", new=GUITAR_SET_TEST_INDEX): + yield + -def test_guitarset_to_tf_example(tmp_path: pathlib.Path) -> None: +def test_guitarset_to_tf_example(tmp_path: pathlib.Path, mock_guitarset_index: None) -> None: mock_guitarset_home = tmp_path / "guitarset" mock_guitarset_audio = mock_guitarset_home / "audio_mono-mic" mock_guitarset_annotations = mock_guitarset_home / "annotation" @@ -91,7 +102,7 @@ def test_guitarset_invalid_tracks(tmpdir: str) -> None: assert fp.read().strip() == str(i) -def test_guitarset_create_input_data() -> None: +def test_guitarset_create_input_data(mock_guitarset_index: None) -> None: data = create_input_data(train_percent=0.33, validation_percent=0.33) data.sort(key=lambda el: el[1]) # sort by split tolerance = 0.1 diff --git a/tests/data/test_ikala.py b/tests/data/test_ikala.py index 66756cc..6466ea7 100644 --- a/tests/data/test_ikala.py +++ b/tests/data/test_ikala.py @@ -14,10 +14,13 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import pytest +import pathlib +from unittest import mock import apache_beam as beam import itertools import os - +import json from apache_beam.testing.test_pipeline import TestPipeline from basic_pitch.data.datasets.ikala import ( @@ -25,9 +28,18 @@ create_input_data, ) - # TODO: Create test_ikala_to_tf_example +RESOURCES_PATH = pathlib.Path(__file__).parent.parent / "resources" +IKALA_TEST_INDEX = json.load(open(RESOURCES_PATH / "data" / "ikala" / "dummy_index.json")) + + +@pytest.fixture # type: ignore[misc] +def mock_ikala_index() -> None: # type: ignore[misc] + with mock.patch("mirdata.datasets.ikala.Dataset.download"): + with mock.patch("mirdata.datasets.ikala.Dataset._index", new=IKALA_TEST_INDEX): + yield + def test_ikala_invalid_tracks(tmpdir: str) -> None: split_labels = ["train", "validation"] @@ -51,7 +63,7 @@ def test_ikala_invalid_tracks(tmpdir: str) -> None: assert fp.read().strip() == str(i) -def test_ikala_create_input_data() -> None: +def test_ikala_create_input_data(mock_ikala_index: None) -> None: data = create_input_data(train_percent=0.5) data.sort(key=lambda el: el[1]) # sort by split tolerance = 0.1 diff --git a/tests/data/test_maestro.py b/tests/data/test_maestro.py index e43a131..bd76be1 100644 --- a/tests/data/test_maestro.py +++ b/tests/data/test_maestro.py @@ -16,7 +16,9 @@ # limitations under the License. import os import pathlib - +import json +import pytest +from unittest import mock from typing import List import apache_beam as beam @@ -39,8 +41,20 @@ TEST_TRACK_ID = "2004/MIDI-Unprocessed_SMF_02_R1_2004_01-05_ORIG_MID--AUDIO_02_R1_2004_08_Track08_wav" GT_15M_TRACK_ID = "2004/MIDI-Unprocessed_SMF_02_R1_2004_01-05_ORIG_MID--AUDIO_02_R1_2004_05_Track05_wav" +MAESTRO_TEST_INDEX = json.load(open(RESOURCES_PATH / "data" / "maestro" / "dummy_index.json")) +METADATA_TEST_INDEX = json.load(open(RESOURCES_PATH / "data" / "maestro" / "maestro-v2.0.0.json")) + + +@pytest.fixture # type: ignore[misc] +def mock_maestro_index() -> None: # type: ignore[misc] + index_with_metadata = MAESTRO_TEST_INDEX + index_with_metadata["metadata"] = METADATA_TEST_INDEX + with mock.patch("mirdata.datasets.maestro.Dataset.download"): + with mock.patch("mirdata.datasets.maestro.Dataset._index", new=index_with_metadata): + yield + -def test_maestro_to_tf_example(tmp_path: pathlib.Path) -> None: +def test_maestro_to_tf_example(tmp_path: pathlib.Path, mock_maestro_index: None) -> None: mock_maestro_home = tmp_path / "maestro" mock_maestro_ext = mock_maestro_home / "2004" mock_maestro_ext.mkdir(parents=True, exist_ok=True) @@ -68,7 +82,7 @@ def test_maestro_to_tf_example(tmp_path: pathlib.Path) -> None: assert len(data) != 0 -def test_maestro_invalid_tracks(tmp_path: pathlib.Path) -> None: +def test_maestro_invalid_tracks(tmp_path: pathlib.Path, mock_maestro_index: None) -> None: mock_maestro_home = tmp_path / "maestro" mock_maestro_ext = mock_maestro_home / "2004" mock_maestro_ext.mkdir(parents=True, exist_ok=True) @@ -98,7 +112,7 @@ def test_maestro_invalid_tracks(tmp_path: pathlib.Path) -> None: assert fp.read().strip() == track_id -def test_maestro_invalid_tracks_over_15_min(tmp_path: pathlib.Path) -> None: +def test_maestro_invalid_tracks_over_15_min(tmp_path: pathlib.Path, mock_maestro_index: None) -> None: """ The track id used here is a real track id in maestro, and it is part of the train split, but we mock the data so as not to store a large file in git, hence the variable name. @@ -131,13 +145,13 @@ def test_maestro_invalid_tracks_over_15_min(tmp_path: pathlib.Path) -> None: assert fp.read().strip() == "" -def test_maestro_create_input_data() -> None: +def test_maestro_create_input_data(mock_maestro_index: None) -> None: """ A commuted metadata file is included in the repo for testing. mirdata references the metadata file to populate the tracklist with metadata. Since the file is commuted to only the filenames referenced here, we only consider these when testing the metadata. """ - data = create_input_data(str(MAESTRO_TEST_DATA_PATH)) + data = create_input_data() assert len(data) test_fnames = {TRAIN_TRACK_ID, VALID_TRACK_ID, TEST_TRACK_ID, GT_15M_TRACK_ID} diff --git a/tests/data/test_medleydb_pitch.py b/tests/data/test_medleydb_pitch.py index f2c7b23..c8f685c 100644 --- a/tests/data/test_medleydb_pitch.py +++ b/tests/data/test_medleydb_pitch.py @@ -17,6 +17,10 @@ import apache_beam as beam import itertools import os +import json +import pytest +import pathlib +from unittest import mock from apache_beam.testing.test_pipeline import TestPipeline @@ -28,6 +32,16 @@ # TODO: Create test_medleydb_pitch_to_tf_example +RESOURCES_PATH = pathlib.Path(__file__).parent.parent / "resources" +MEDLEYDB_PITCH_TEST_INDEX = json.load(open(RESOURCES_PATH / "data" / "medleydb_pitch" / "dummy_index.json")) + + +@pytest.fixture # type: ignore[misc] +def mock_medleydb_pitch_index() -> None: # type: ignore[misc] + with mock.patch("mirdata.datasets.medleydb_pitch.Dataset.download"): + with mock.patch("mirdata.datasets.medleydb_pitch.Dataset._index", new=MEDLEYDB_PITCH_TEST_INDEX): + yield + def test_medleydb_pitch_invalid_tracks(tmpdir: str) -> None: split_labels = ["train", "validation"] @@ -51,7 +65,7 @@ def test_medleydb_pitch_invalid_tracks(tmpdir: str) -> None: assert fp.read().strip() == str(i) -def test_medleydb_create_input_data() -> None: +def test_medleydb_create_input_data(mock_medleydb_pitch_index: None) -> None: data = create_input_data(train_percent=0.5) data.sort(key=lambda el: el[1]) # sort by split tolerance = 0.01 diff --git a/tests/data/test_slakh.py b/tests/data/test_slakh.py index 744fa7d..0adcb55 100644 --- a/tests/data/test_slakh.py +++ b/tests/data/test_slakh.py @@ -19,6 +19,9 @@ import os import pathlib import shutil +import json +import pytest +from unittest import mock from typing import List, Tuple @@ -49,6 +52,16 @@ OMITTED_DRUMS_TRACK_ID = "Track00049-S06" +SLAKH_TEST_INDEX = json.load(open(RESOURCES_PATH / "data" / "slakh" / "dummy_index.json")) + + +@pytest.fixture # type: ignore[misc] +def mock_slakh_index() -> None: # type: ignore[misc] + with mock.patch("mirdata.datasets.slakh.Dataset.download"): + with mock.patch("mirdata.datasets.slakh.Dataset._index", new=SLAKH_TEST_INDEX): + yield + + # Function to generate a sine wave def create_mock_input_data(data_home: pathlib.Path, input_data: List[Tuple[str, str]]) -> None: for track_id, split in input_data: @@ -175,7 +188,7 @@ def test_slakh_invalid_tracks_drums(tmp_path: pathlib.Path) -> None: assert fp.read().strip() == "" -def test_create_input_data() -> None: +def test_create_input_data(mock_slakh_index: None) -> None: data = create_input_data() for _, group in itertools.groupby(data, lambda el: el[1]): assert len(list(group)) diff --git a/tests/resources/data/guitarset/dummy_index.json b/tests/resources/data/guitarset/dummy_index.json new file mode 100644 index 0000000..70d9a4f --- /dev/null +++ b/tests/resources/data/guitarset/dummy_index.json @@ -0,0 +1,71 @@ +{ + "version": "sample", + "tracks": { + "03_BN3-119-G_solo": { + "audio_hex_cln": [ + "audio_hex-pickup_debleeded/03_BN3-119-G_solo_hex_cln.wav", + "5b8fc14284710793d1394bd07d44b8d7" + ], + "audio_hex": [ + "audio_hex-pickup_original/03_BN3-119-G_solo_hex.wav", + "5fdfd2e246fed2263dc917ef345d31b5" + ], + "audio_mic": [ + "audio_mono-mic/03_BN3-119-G_solo_mic.wav", + "0bef5d2a8aef9e83497887696435623f" + ], + "audio_mix": [ + "audio_mono-pickup_mix/03_BN3-119-G_solo_mix.wav", + "8ec4845f03e754c04f0576b8ea51dc70" + ], + "jams": [ + "annotation/03_BN3-119-G_solo.jams", + "ae92a9c2b79e589544a36f785f12f7d2" + ] + }, + "00_BN1-129-Eb_comp": { + "audio_hex_cln": [ + "audio_hex-pickup_debleeded/00_BN1-129-Eb_comp_hex_cln.wav", + "a95114e82e661108bb53aa7f0a76453e" + ], + "audio_hex": [ + "audio_hex-pickup_original/00_BN1-129-Eb_comp_hex.wav", + "25cf3ea1d3131e1862e2b7422ededa5f" + ], + "audio_mic": [ + "audio_mono-mic/00_BN1-129-Eb_comp_mic.wav", + "90afbf41f0bea5afc6c7af0c9c7d45a9" + ], + "audio_mix": [ + "audio_mono-pickup_mix/00_BN1-129-Eb_comp_mix.wav", + "11d99f35c51d08a8169bddb90474bbd1" + ], + "jams": [ + "annotation/00_BN1-129-Eb_comp.jams", + "9f68b0708dc1afc361fb1229899b00d0" + ] + }, + "00_BN1-147-Gb_comp": { + "audio_hex_cln": [ + "audio_hex-pickup_debleeded/00_BN1-147-Gb_comp_hex_cln.wav", + "a95114e82e661108bb53aa7f0a76453e" + ], + "audio_hex": [ + "audio_hex-pickup_original/00_BN1-147-Gb_comp_hex.wav", + "25cf3ea1d3131e1862e2b7422ededa5f" + ], + "audio_mic": [ + "audio_mono-mic/00_BN1-147-Gb_comp_mic.wav", + "90afbf41f0bea5afc6c7af0c9c7d45a9" + ], + "audio_mix": [ + "audio_mono-pickup_mix/00_BN1-147-Gb_comp_mix.wav", + "11d99f35c51d08a8169bddb90474bbd1" + ], + "jams": [ + "annotation/00_BN1-147-Gb_comp.jams", + "9f68b0708dc1afc361fb1229899b00d0" + ] + } + } +} diff --git a/tests/resources/data/ikala/dummy_index.json b/tests/resources/data/ikala/dummy_index.json new file mode 100644 index 0000000..e584e3a --- /dev/null +++ b/tests/resources/data/ikala/dummy_index.json @@ -0,0 +1,47 @@ +{ + "version": "sample", + "tracks": { + "10161_chorus": { + "audio": [ + "Wavfile/10161_chorus.wav", + "278ae003cb0d323e99b9a643c0f2eeda" + ], + "pitch": [ + "PitchLabel/10161_chorus.pv", + "0d93a011a9e668fd80673049089bbb14" + ], + "lyrics": [ + "Lyrics/10161_chorus.lab", + "79bbeb72b422056fd43be4e8d63319ce" + ], + "notes_pyin": [ + "ikala-pyin-notes/10161_chorus_vamp_pyin_pyin_notes.csv", + "015856455537ab232140746f2df7b857" + ] + }, + "10161_solo": { + "audio": [ + "Wavfile/10161_solo.wav", + "278ae003cb0d323e99b9a643c0f2eeda" + ], + "pitch": [ + "PitchLabel/10161_solo.pv", + "0d93a011a9e668fd80673049089bbb14" + ], + "lyrics": [ + "Lyrics/10161_solo.lab", + "79bbeb72b422056fd43be4e8d63319ce" + ], + "notes_pyin": [ + "ikala-pyin-notes/10161_solo_vamp_pyin_pyin_notes.csv", + "015856455537ab232140746f2df7b857" + ] + } + }, + "metadata": { + "id_mapping": [ + "id_mapping.txt", + "81097b587804ce93e56c7a331ba06abc" + ] + } +} diff --git a/tests/resources/data/maestro/dummy_index.json b/tests/resources/data/maestro/dummy_index.json new file mode 100644 index 0000000..389bd03 --- /dev/null +++ b/tests/resources/data/maestro/dummy_index.json @@ -0,0 +1,55 @@ +{ + "version": "2.0.0", + "tracks": { + "2018/MIDI-Unprocessed_Chamber3_MID--AUDIO_10_R3_2018_wav--1": { + "midi": [ + "2018/MIDI-Unprocessed_Chamber3_MID--AUDIO_10_R3_2018_wav--1.midi", + "4901b1578ee4fe8c1696e02f60924949" + ], + "audio": [ + "2018/MIDI-Unprocessed_Chamber3_MID--AUDIO_10_R3_2018_wav--1.wav", + "1694d8431f01eeb2a18444196550b99d" + ] + }, + "2004/MIDI-Unprocessed_SMF_05_R1_2004_01_ORIG_MID--AUDIO_05_R1_2004_03_Track03_wav": { + "midi": [ + "2004/MIDI-Unprocessed_SMF_05_R1_2004_01_ORIG_MID--AUDIO_05_R1_2004_03_Track03_wav.midi", + "4901b1578ee4fe8c1696e02f60924949" + ], + "audio": [ + "2004/MIDI-Unprocessed_SMF_05_R1_2004_01_ORIG_MID--AUDIO_05_R1_2004_03_Track03_wav.wav", + "1694d8431f01eeb2a18444196550b99d" + ] + }, + "2004/MIDI-Unprocessed_SMF_02_R1_2004_01-05_ORIG_MID--AUDIO_02_R1_2004_05_Track05_wav": { + "midi": [ + "2004/MIDI-Unprocessed_SMF_02_R1_2004_01-05_ORIG_MID--AUDIO_02_R1_2004_05_Track05_wav.midi", + "4901b1578ee4fe8c1696e02f60924949" + ], + "audio": [ + "2004/MIDI-Unprocessed_SMF_02_R1_2004_01-05_ORIG_MID--AUDIO_02_R1_2004_05_Track05_wav.wav", + "1694d8431f01eeb2a18444196550b99d" + ] + }, + "2004/MIDI-Unprocessed_SMF_02_R1_2004_01-05_ORIG_MID--AUDIO_02_R1_2004_06_Track06_wav":{ + "midi": [ + "2004/MIDI-Unprocessed_SMF_02_R1_2004_01-05_ORIG_MID--AUDIO_02_R1_2004_06_Track06_wav.midi", + "4901b1578ee4fe8c1696e02f60924949" + ], + "audio": [ + "2004/MIDI-Unprocessed_SMF_02_R1_2004_01-05_ORIG_MID--AUDIO_02_R1_2004_06_Track06_wav.wav", + "1694d8431f01eeb2a18444196550b99d" + ] + }, + "2004/MIDI-Unprocessed_SMF_02_R1_2004_01-05_ORIG_MID--AUDIO_02_R1_2004_08_Track08_wav": { + "midi": [ + "2004/MIDI-Unprocessed_SMF_02_R1_2004_01-05_ORIG_MID--AUDIO_02_R1_2004_08_Track08_wav.midi", + "4901b1578ee4fe8c1696e02f60924949" + ], + "audio": [ + "2004/MIDI-Unprocessed_SMF_02_R1_2004_01-05_ORIG_MID--AUDIO_02_R1_2004_08_Track08_wav.wav", + "1694d8431f01eeb2a18444196550b99d" + ] + } + } +} \ No newline at end of file diff --git a/tests/resources/data/medleydb_pitch/dummy_index.json b/tests/resources/data/medleydb_pitch/dummy_index.json new file mode 100644 index 0000000..58a378e --- /dev/null +++ b/tests/resources/data/medleydb_pitch/dummy_index.json @@ -0,0 +1,39 @@ +{ + "version": "sample", + "tracks": { + "AClassicEducation_NightOwl_STEM_08": { + "audio": [ + "audio/AClassicEducation_NightOwl_STEM_08.wav", + "6cfb976517cf377863ba0ef6c66c6a07" + ], + "pitch": [ + "pitch/AClassicEducation_NightOwl_STEM_08.csv", + "67009ae37766c37d3c29146bf763e06d" + ], + "notes_pyin": [ + "medleydb-pitch-pyin-notes/AClassicEducation_NightOwl_STEM_08_vamp_pyin_pyin_notes.csv", + "32df71f481f3e49342aee5083ebe9bf6" + ] + }, + "BClassicEducation_NightOwl_STEM_08": { + "audio": [ + "audio/BClassicEducation_NightOwl_STEM_08.wav", + "6cfb976517cf377863ba0ef6c66c6a07" + ], + "pitch": [ + "pitch/BClassicEducation_NightOwl_STEM_08.csv", + "67009ae37766c37d3c29146bf763e06d" + ], + "notes_pyin": [ + "medleydb-pitch-pyin-notes/BClassicEducation_NightOwl_STEM_08_vamp_pyin_pyin_notes.csv", + "32df71f481f3e49342aee5083ebe9bf6" + ] + } + }, + "metadata": { + "medleydb_pitch_metadata": [ + "medleydb_pitch_metadata.json", + "0507b05c14549beccf17ba04d02f50aa" + ] + } +} diff --git a/tests/resources/data/slakh/dummy_index.json b/tests/resources/data/slakh/dummy_index.json new file mode 100644 index 0000000..1f55484 --- /dev/null +++ b/tests/resources/data/slakh/dummy_index.json @@ -0,0 +1,686 @@ +{ + "version": "sample_2100-redux", + "tracks": { + "Track00001-S00": { + "audio": [ + "slakh2100_flac_redux/train/Track00001/stems/S00.flac", + "bb4a50848831853a086e0f6e5b595804" + ], + "midi": [ + "slakh2100_flac_redux/train/Track00001/MIDI/S00.mid", + "68f9d227a4fd70acdcd80a5bd3b69e22" + ], + "metadata": [ + "slakh2100_flac_redux/train/Track00001/metadata.yaml", + "5258ffe8376e16e5e34b71e7323c0477" + ] + }, + "Track00001-S01": { + "audio": [ + "slakh2100_flac_redux/train/Track00001/stems/S01.flac", + "4faaa87260d154c062da60612d985c21" + ], + "midi": [ + "slakh2100_flac_redux/train/Track00001/MIDI/S01.mid", + "4f7d02354ff2ff7711bfc239e51ac8ee" + ], + "metadata": [ + "slakh2100_flac_redux/train/Track00001/metadata.yaml", + "5258ffe8376e16e5e34b71e7323c0477" + ] + }, + "Track00001-S02": { + "audio": [ + "slakh2100_flac_redux/train/Track00001/stems/S02.flac", + "8986426fd0916708f21b3a6e9a600316" + ], + "midi": [ + "slakh2100_flac_redux/train/Track00001/MIDI/S02.mid", + "ebc174f4f8a1c89c2a8c9ef8b9d9e3ae" + ], + "metadata": [ + "slakh2100_flac_redux/train/Track00001/metadata.yaml", + "5258ffe8376e16e5e34b71e7323c0477" + ] + }, + "Track00001-S03": { + "audio": [ + "slakh2100_flac_redux/train/Track00001/stems/S03.flac", + "f8b0f5dde956ad0b69da420e62570f7e" + ], + "midi": [ + "slakh2100_flac_redux/train/Track00001/MIDI/S03.mid", + "4bf9331befcaa0dbb02322b9426130dd" + ], + "metadata": [ + "slakh2100_flac_redux/train/Track00001/metadata.yaml", + "5258ffe8376e16e5e34b71e7323c0477" + ] + }, + "Track00001-S04": { + "audio": [ + "slakh2100_flac_redux/train/Track00001/stems/S04.flac", + "f5e367786d5c95c69bf4341028cd43b5" + ], + "midi": [ + "slakh2100_flac_redux/train/Track00001/MIDI/S04.mid", + "31c56abf0d23cf683f09eb69bbe0b9ea" + ], + "metadata": [ + "slakh2100_flac_redux/train/Track00001/metadata.yaml", + "5258ffe8376e16e5e34b71e7323c0477" + ] + }, + "Track00001-S05": { + "audio": [ + "slakh2100_flac_redux/train/Track00001/stems/S05.flac", + "64554ff2a2076073e303b1d117beefa9" + ], + "midi": [ + "slakh2100_flac_redux/train/Track00001/MIDI/S05.mid", + "c40df0d092b1eb74d4c6279ef141c6be" + ], + "metadata": [ + "slakh2100_flac_redux/train/Track00001/metadata.yaml", + "5258ffe8376e16e5e34b71e7323c0477" + ] + }, + "Track00001-S07": { + "audio": [ + "slakh2100_flac_redux/train/Track00001/stems/S07.flac", + "2fa435c24b83ba7d6c2d823a378a6f9c" + ], + "midi": [ + "slakh2100_flac_redux/train/Track00001/MIDI/S07.mid", + "106324b42720dbd5cab2f32abb1d2c43" + ], + "metadata": [ + "slakh2100_flac_redux/train/Track00001/metadata.yaml", + "5258ffe8376e16e5e34b71e7323c0477" + ] + }, + "Track00001-S08": { + "audio": [ + "slakh2100_flac_redux/train/Track00001/stems/S08.flac", + "6bd7f7a56713976918406573416dd7d0" + ], + "midi": [ + "slakh2100_flac_redux/train/Track00001/MIDI/S08.mid", + "a9b3fc23e3237e2244217f2188c8efb7" + ], + "metadata": [ + "slakh2100_flac_redux/train/Track00001/metadata.yaml", + "5258ffe8376e16e5e34b71e7323c0477" + ] + }, + "Track00001-S09": { + "audio": [ + "slakh2100_flac_redux/train/Track00001/stems/S09.flac", + "fd911e031f60af304722cf2dc55c60a0" + ], + "midi": [ + "slakh2100_flac_redux/train/Track00001/MIDI/S09.mid", + "9acde011a3d1a48c4fd6b4c9d838bc63" + ], + "metadata": [ + "slakh2100_flac_redux/train/Track00001/metadata.yaml", + "5258ffe8376e16e5e34b71e7323c0477" + ] + }, + "Track00001-S10": { + "audio": [ + "slakh2100_flac_redux/train/Track00001/stems/S10.flac", + "d1f6f178be71688cbc7ea58e409cf34d" + ], + "midi": [ + "slakh2100_flac_redux/train/Track00001/MIDI/S10.mid", + "e97c2639abc3ef34fe18b3f979a3ca70" + ], + "metadata": [ + "slakh2100_flac_redux/train/Track00001/metadata.yaml", + "5258ffe8376e16e5e34b71e7323c0477" + ] + }, + "Track01501-S00": { + "audio": [ + "slakh2100_flac_redux/validation/Track01501/stems/S00.flac", + "e2a13a55e60f2f8be146fd8d323777b9" + ], + "midi": [ + "slakh2100_flac_redux/validation/Track01501/MIDI/S00.mid", + "e942ef890a1ca4502d1118c223c31da2" + ], + "metadata": [ + "slakh2100_flac_redux/validation/Track01501/metadata.yaml", + "87aaebf369ee5ff24d5eadc5f1ae4258" + ] + }, + "Track01501-S01": { + "audio": [ + "slakh2100_flac_redux/validation/Track01501/stems/S01.flac", + "05d58c13ac3c7b170e561f7490e2a432" + ], + "midi": [ + "slakh2100_flac_redux/validation/Track01501/MIDI/S01.mid", + "18140ecb668b04dc7db88bec51baf926" + ], + "metadata": [ + "slakh2100_flac_redux/validation/Track01501/metadata.yaml", + "87aaebf369ee5ff24d5eadc5f1ae4258" + ] + }, + "Track01501-S02": { + "audio": [ + "slakh2100_flac_redux/validation/Track01501/stems/S02.flac", + "cc456bcd7cbc960567747382589c1fd2" + ], + "midi": [ + "slakh2100_flac_redux/validation/Track01501/MIDI/S02.mid", + "440281aa8ae6061f1bcb7293941709ef" + ], + "metadata": [ + "slakh2100_flac_redux/validation/Track01501/metadata.yaml", + "87aaebf369ee5ff24d5eadc5f1ae4258" + ] + }, + "Track01501-S03": { + "audio": [ + "slakh2100_flac_redux/validation/Track01501/stems/S03.flac", + "4c1f3fafbe0c5c5a259da499f428bb1e" + ], + "midi": [ + "slakh2100_flac_redux/validation/Track01501/MIDI/S03.mid", + "726742af9a4ce30a200a6ae7fe6347ae" + ], + "metadata": [ + "slakh2100_flac_redux/validation/Track01501/metadata.yaml", + "87aaebf369ee5ff24d5eadc5f1ae4258" + ] + }, + "Track01501-S04": { + "audio": [ + "slakh2100_flac_redux/validation/Track01501/stems/S04.flac", + "e8277f268d6f4941a906a56de921e1be" + ], + "midi": [ + "slakh2100_flac_redux/validation/Track01501/MIDI/S04.mid", + "49c5aa5dcd4ee3c740473d9ef13c5f6c" + ], + "metadata": [ + "slakh2100_flac_redux/validation/Track01501/metadata.yaml", + "87aaebf369ee5ff24d5eadc5f1ae4258" + ] + }, + "Track01501-S05": { + "audio": [ + "slakh2100_flac_redux/validation/Track01501/stems/S05.flac", + "09f9b917264ac77a6f130277128cc6af" + ], + "midi": [ + "slakh2100_flac_redux/validation/Track01501/MIDI/S05.mid", + "0f6ba990862772bcfb4ac779d440fd5f" + ], + "metadata": [ + "slakh2100_flac_redux/validation/Track01501/metadata.yaml", + "87aaebf369ee5ff24d5eadc5f1ae4258" + ] + }, + "Track01501-S06": { + "audio": [ + "slakh2100_flac_redux/validation/Track01501/stems/S06.flac", + "d5bf86c59ad6caec76385fffd2b2c632" + ], + "midi": [ + "slakh2100_flac_redux/validation/Track01501/MIDI/S06.mid", + "bfb20d1ad0b2499599496aa247f384df" + ], + "metadata": [ + "slakh2100_flac_redux/validation/Track01501/metadata.yaml", + "87aaebf369ee5ff24d5eadc5f1ae4258" + ] + }, + "Track01501-S07": { + "audio": [ + "slakh2100_flac_redux/validation/Track01501/stems/S07.flac", + "3379a0d795e7c3c6f6ccac4daceb4a0b" + ], + "midi": [ + "slakh2100_flac_redux/validation/Track01501/MIDI/S07.mid", + "2d1e356ccafce5c51f92dff5549a56e5" + ], + "metadata": [ + "slakh2100_flac_redux/validation/Track01501/metadata.yaml", + "87aaebf369ee5ff24d5eadc5f1ae4258" + ] + }, + "Track01501-S08": { + "audio": [ + "slakh2100_flac_redux/validation/Track01501/stems/S08.flac", + "9a704adc323e84c1803555ff5156db57" + ], + "midi": [ + "slakh2100_flac_redux/validation/Track01501/MIDI/S08.mid", + "f3e51a064a5fff6a804864da19e85a5b" + ], + "metadata": [ + "slakh2100_flac_redux/validation/Track01501/metadata.yaml", + "87aaebf369ee5ff24d5eadc5f1ae4258" + ] + }, + "Track01501-S09": { + "audio": [ + null, + null + ], + "midi": [ + "slakh2100_flac_redux/validation/Track01501/MIDI/S09.mid", + "58e00e3d952d84e28f3c6e48f439f405" + ], + "metadata": [ + "slakh2100_flac_redux/validation/Track01501/metadata.yaml", + "87aaebf369ee5ff24d5eadc5f1ae4258" + ] + }, + "Track01501-S10": { + "audio": [ + "slakh2100_flac_redux/validation/Track01501/stems/S10.flac", + "06914458b7546eb56c00a98459862220" + ], + "midi": [ + "slakh2100_flac_redux/validation/Track01501/MIDI/S10.mid", + "989b89f9b0a44d99f3897c53f38e5d65" + ], + "metadata": [ + "slakh2100_flac_redux/validation/Track01501/metadata.yaml", + "87aaebf369ee5ff24d5eadc5f1ae4258" + ] + }, + "Track01876-S00": { + "audio": [ + "slakh2100_flac_redux/test/Track01876/stems/S00.flac", + "dea2299d28513ba1636f05f3d4aa7883" + ], + "midi": [ + "slakh2100_flac_redux/test/Track01876/MIDI/S00.mid", + "01c2299f8d6713da475e227bba2e754f" + ], + "metadata": [ + "slakh2100_flac_redux/test/Track01876/metadata.yaml", + "a1f3b78e256e5659d01277351bf1d134" + ] + }, + "Track01876-S01": { + "audio": [ + "slakh2100_flac_redux/test/Track01876/stems/S01.flac", + "e738e61eaa6fa288feef4be4f76ca761" + ], + "midi": [ + "slakh2100_flac_redux/test/Track01876/MIDI/S01.mid", + "f586cdc0f38cf6245938e77997019d99" + ], + "metadata": [ + "slakh2100_flac_redux/test/Track01876/metadata.yaml", + "a1f3b78e256e5659d01277351bf1d134" + ] + }, + "Track01876-S02": { + "audio": [ + "slakh2100_flac_redux/test/Track01876/stems/S02.flac", + "1f07610fdbb9778bb1da5ebee9676140" + ], + "midi": [ + "slakh2100_flac_redux/test/Track01876/MIDI/S02.mid", + "3f6959a02b36e64c6fc563543235aa5f" + ], + "metadata": [ + "slakh2100_flac_redux/test/Track01876/metadata.yaml", + "a1f3b78e256e5659d01277351bf1d134" + ] + }, + "Track01876-S03": { + "audio": [ + "slakh2100_flac_redux/test/Track01876/stems/S03.flac", + "75177cb6043dc25a41ea00d5c5dbe0b8" + ], + "midi": [ + "slakh2100_flac_redux/test/Track01876/MIDI/S03.mid", + "fda5264d0adadf0418b013aab4fdd93d" + ], + "metadata": [ + "slakh2100_flac_redux/test/Track01876/metadata.yaml", + "a1f3b78e256e5659d01277351bf1d134" + ] + }, + "Track01876-S04": { + "audio": [ + "slakh2100_flac_redux/test/Track01876/stems/S04.flac", + "165807d0ed24f736b727cf30edf784b5" + ], + "midi": [ + "slakh2100_flac_redux/test/Track01876/MIDI/S04.mid", + "51fd931a1c96408682e064e6c696cb9e" + ], + "metadata": [ + "slakh2100_flac_redux/test/Track01876/metadata.yaml", + "a1f3b78e256e5659d01277351bf1d134" + ] + }, + "Track01876-S05": { + "audio": [ + "slakh2100_flac_redux/test/Track01876/stems/S05.flac", + "a9c81473f330926dce532718f0683288" + ], + "midi": [ + "slakh2100_flac_redux/test/Track01876/MIDI/S05.mid", + "c2e670fb69390428574473cdd5ce4d15" + ], + "metadata": [ + "slakh2100_flac_redux/test/Track01876/metadata.yaml", + "a1f3b78e256e5659d01277351bf1d134" + ] + }, + "Track01876-S06": { + "audio": [ + "slakh2100_flac_redux/test/Track01876/stems/S06.flac", + "63aee8696a23a3e194059e49a00a6f24" + ], + "midi": [ + "slakh2100_flac_redux/test/Track01876/MIDI/S06.mid", + "9c57e49129c53d1793758e55b41c6266" + ], + "metadata": [ + "slakh2100_flac_redux/test/Track01876/metadata.yaml", + "a1f3b78e256e5659d01277351bf1d134" + ] + }, + "Track01876-S07": { + "audio": [ + "slakh2100_flac_redux/test/Track01876/stems/S07.flac", + "e0c031c4f851d8fbbbfd872ec188548f" + ], + "midi": [ + "slakh2100_flac_redux/test/Track01876/MIDI/S07.mid", + "8e9e1059edd9b42bcaeedb64b1b86d85" + ], + "metadata": [ + "slakh2100_flac_redux/test/Track01876/metadata.yaml", + "a1f3b78e256e5659d01277351bf1d134" + ] + }, + "Track01876-S08": { + "audio": [ + "slakh2100_flac_redux/test/Track01876/stems/S08.flac", + "68788ca6ff835e64da685d89f9d41c92" + ], + "midi": [ + "slakh2100_flac_redux/test/Track01876/MIDI/S08.mid", + "2815f0deb431186492277e298c6a22f8" + ], + "metadata": [ + "slakh2100_flac_redux/test/Track01876/metadata.yaml", + "a1f3b78e256e5659d01277351bf1d134" + ] + }, + "Track01876-S09": { + "audio": [ + "slakh2100_flac_redux/test/Track01876/stems/S09.flac", + "45f1ad752622bd2a55836b810adbb0ed" + ], + "midi": [ + "slakh2100_flac_redux/test/Track01876/MIDI/S09.mid", + "a660be39c5d28a364b1c379bc6ae9387" + ], + "metadata": [ + "slakh2100_flac_redux/test/Track01876/metadata.yaml", + "a1f3b78e256e5659d01277351bf1d134" + ] + }, + "Track01876-S10": { + "audio": [ + "slakh2100_flac_redux/test/Track01876/stems/S10.flac", + "b07df78cd6078f2859aa4d9295cfc887" + ], + "midi": [ + "slakh2100_flac_redux/test/Track01876/MIDI/S10.mid", + "2dba9d94b65be375d7bdf512015d77f6" + ], + "metadata": [ + "slakh2100_flac_redux/test/Track01876/metadata.yaml", + "a1f3b78e256e5659d01277351bf1d134" + ] + }, + "Track01876-S11": { + "audio": [ + "slakh2100_flac_redux/test/Track01876/stems/S11.flac", + "4c0fb526728df866a0cc2ed2f1f8a42c" + ], + "midi": [ + "slakh2100_flac_redux/test/Track01876/MIDI/S11.mid", + "3119dacbdbb3a79b2da4215afc1a00cf" + ], + "metadata": [ + "slakh2100_flac_redux/test/Track01876/metadata.yaml", + "a1f3b78e256e5659d01277351bf1d134" + ] + }, + "Track01876-S14": { + "audio": [ + "slakh2100_flac_redux/test/Track01876/stems/S14.flac", + "dd57e6105868dc8b89e379150bfe74d6" + ], + "midi": [ + "slakh2100_flac_redux/test/Track01876/MIDI/S14.mid", + "43f6d70e6a501ab13fc87967871cd867" + ], + "metadata": [ + "slakh2100_flac_redux/test/Track01876/metadata.yaml", + "a1f3b78e256e5659d01277351bf1d134" + ] + }, + "Track00049-S00": { + "audio": [ + "slakh2100_flac_redux/omitted/Track00049/stems/S00.flac", + "57c1c98b26adc1d633eb0a2d3781f6c7" + ], + "midi": [ + "slakh2100_flac_redux/omitted/Track00049/MIDI/S00.mid", + "b1f1131c20e2ea1c9191752305ae1102" + ], + "metadata": [ + "slakh2100_flac_redux/omitted/Track00049/metadata.yaml", + "d2a582e4975f515ea904a08983f9280c" + ] + }, + "Track00049-S01": { + "audio": [ + "slakh2100_flac_redux/omitted/Track00049/stems/S01.flac", + "fe9f042cfb711f9ebcedcf6fa61dd52f" + ], + "midi": [ + "slakh2100_flac_redux/omitted/Track00049/MIDI/S01.mid", + "f8f6aad753b9aac1294b932944bc1c4f" + ], + "metadata": [ + "slakh2100_flac_redux/omitted/Track00049/metadata.yaml", + "d2a582e4975f515ea904a08983f9280c" + ] + }, + "Track00049-S02": { + "audio": [ + "slakh2100_flac_redux/omitted/Track00049/stems/S02.flac", + "386bce38fdffe6dbc3ffcd05ec9ce2f8" + ], + "midi": [ + "slakh2100_flac_redux/omitted/Track00049/MIDI/S02.mid", + "5c3112d869fd8de8fcbe0949b61542eb" + ], + "metadata": [ + "slakh2100_flac_redux/omitted/Track00049/metadata.yaml", + "d2a582e4975f515ea904a08983f9280c" + ] + }, + "Track00049-S03": { + "audio": [ + "slakh2100_flac_redux/omitted/Track00049/stems/S03.flac", + "fc7f4bc8c37d0f2c61e7282998304c56" + ], + "midi": [ + "slakh2100_flac_redux/omitted/Track00049/MIDI/S03.mid", + "9d6af85c810a6da2ee6b1f04c4bd2d67" + ], + "metadata": [ + "slakh2100_flac_redux/omitted/Track00049/metadata.yaml", + "d2a582e4975f515ea904a08983f9280c" + ] + }, + "Track00049-S04": { + "audio": [ + "slakh2100_flac_redux/omitted/Track00049/stems/S04.flac", + "c157cc90912aeddfa602fe65db762182" + ], + "midi": [ + "slakh2100_flac_redux/omitted/Track00049/MIDI/S04.mid", + "124fc8c65bd2ec41cec01c23e43651e1" + ], + "metadata": [ + "slakh2100_flac_redux/omitted/Track00049/metadata.yaml", + "d2a582e4975f515ea904a08983f9280c" + ] + }, + "Track00049-S05": { + "audio": [ + "slakh2100_flac_redux/omitted/Track00049/stems/S05.flac", + "28aac9a171f344dbf778e2d90745bd06" + ], + "midi": [ + "slakh2100_flac_redux/omitted/Track00049/MIDI/S05.mid", + "0c1728d1e8f8b36a5f92db03be2b92e4" + ], + "metadata": [ + "slakh2100_flac_redux/omitted/Track00049/metadata.yaml", + "d2a582e4975f515ea904a08983f9280c" + ] + }, + "Track00049-S06": { + "audio": [ + "slakh2100_flac_redux/omitted/Track00049/stems/S06.flac", + "5fd8dab204c533881ca880091b440c34" + ], + "midi": [ + "slakh2100_flac_redux/omitted/Track00049/MIDI/S06.mid", + "eb5d018b61eb563ee810c8acfb1ffc73" + ], + "metadata": [ + "slakh2100_flac_redux/omitted/Track00049/metadata.yaml", + "d2a582e4975f515ea904a08983f9280c" + ] + } + }, + "multitracks": { + "Track00001": { + "tracks": [ + "Track00001-S00", + "Track00001-S01", + "Track00001-S02", + "Track00001-S03", + "Track00001-S04", + "Track00001-S05", + "Track00001-S07", + "Track00001-S08", + "Track00001-S09", + "Track00001-S10" + ], + "midi": [ + "slakh2100_flac_redux/train/Track00001/all_src.mid", + "1a81ae092884234f3264e2f45927f00a" + ], + "mix": [ + "slakh2100_flac_redux/train/Track00001/mix.flac", + "99a8dbff63e86e719cb3162456b0c593" + ], + "metadata": [ + "slakh2100_flac_redux/train/Track00001/metadata.yaml", + "5258ffe8376e16e5e34b71e7323c0477" + ] + }, + "Track01876": { + "tracks": [ + "Track01876-S00", + "Track01876-S01", + "Track01876-S02", + "Track01876-S03", + "Track01876-S04", + "Track01876-S05", + "Track01876-S06", + "Track01876-S07", + "Track01876-S08", + "Track01876-S09", + "Track01876-S10", + "Track01876-S11", + "Track01876-S14" + ], + "midi": [ + "slakh2100_flac_redux/test/Track01876/all_src.mid", + "0f12ae105c1dc821c287b74632748b4e" + ], + "mix": [ + "slakh2100_flac_redux/test/Track01876/mix.flac", + "daf25d7f73a61f95b557c54718803fc8" + ], + "metadata": [ + "slakh2100_flac_redux/test/Track01876/metadata.yaml", + "a1f3b78e256e5659d01277351bf1d134" + ] + }, + "Track01501": { + "tracks": [ + "Track01501-S00", + "Track01501-S01", + "Track01501-S02", + "Track01501-S03", + "Track01501-S04", + "Track01501-S05", + "Track01501-S06", + "Track01501-S07", + "Track01501-S08", + "Track01501-S09", + "Track01501-S10" + ], + "midi": [ + "slakh2100_flac_redux/validation/Track01501/all_src.mid", + "52dc589baea0c387807aa964094e0cba" + ], + "mix": [ + "slakh2100_flac_redux/validation/Track01501/mix.flac", + "08900bca2fe0af9c65700ca287b28851" + ], + "metadata": [ + "slakh2100_flac_redux/validation/Track01501/metadata.yaml", + "87aaebf369ee5ff24d5eadc5f1ae4258" + ] + }, + "Track00049": { + "tracks": [ + "Track00049-S00", + "Track00049-S01", + "Track00049-S02", + "Track00049-S03", + "Track00049-S04", + "Track00049-S05", + "Track00049-S06" + ], + "midi": [ + "slakh2100_flac_redux/omitted/Track00049/all_src.mid", + "763348b79ed855fa294a7c980392178b" + ], + "mix": [ + "slakh2100_flac_redux/omitted/Track00049/mix.flac", + "f686f679484148844023372d2ef12bab" + ], + "metadata": [ + "slakh2100_flac_redux/omitted/Track00049/metadata.yaml", + "d2a582e4975f515ea904a08983f9280c" + ] + } + } +} From 377ac09a97baa2e08312b461a2bae99d7e988903 Mon Sep 17 00:00:00 2001 From: hyperc54 Date: Tue, 11 Nov 2025 15:28:09 +0100 Subject: [PATCH 2/7] update comment --- basic_pitch/data/datasets/ikala.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/basic_pitch/data/datasets/ikala.py b/basic_pitch/data/datasets/ikala.py index bba9bbe..130914c 100644 --- a/basic_pitch/data/datasets/ikala.py +++ b/basic_pitch/data/datasets/ikala.py @@ -29,8 +29,7 @@ from basic_pitch.data import commandline, pipeline -# Oct 2025: Ikala remote download is broken so we're only loading a sample -# TODO: Re-evaluate later +# Oct 2025: Ikala remote download is broken # TODO: Re-evaluate later class IkalaInvalidTracks(beam.DoFn): def process(self, element: Tuple[str, str], *args: Tuple[Any, Any], **kwargs: Dict[str, Any]) -> Any: track_id, split = element From 38517da0c3d95ee682baff3afc3442492022465b Mon Sep 17 00:00:00 2001 From: hyperc54 Date: Tue, 11 Nov 2025 15:28:37 +0100 Subject: [PATCH 3/7] update comment --- basic_pitch/data/datasets/ikala.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/basic_pitch/data/datasets/ikala.py b/basic_pitch/data/datasets/ikala.py index 130914c..f288c3a 100644 --- a/basic_pitch/data/datasets/ikala.py +++ b/basic_pitch/data/datasets/ikala.py @@ -29,7 +29,7 @@ from basic_pitch.data import commandline, pipeline -# Oct 2025: Ikala remote download is broken # TODO: Re-evaluate later +# Oct 2025: Ikala remote download is broken on mirdata side # TODO: Re-evaluate later class IkalaInvalidTracks(beam.DoFn): def process(self, element: Tuple[str, str], *args: Tuple[Any, Any], **kwargs: Dict[str, Any]) -> Any: track_id, split = element From 2eb34f3a800313f9bab06fceefae96e07226ba93 Mon Sep 17 00:00:00 2001 From: hyperc54 Date: Tue, 11 Nov 2025 15:58:51 +0100 Subject: [PATCH 4/7] Move fixtures to conftest, apply to more tests --- tests/data/conftest.py | 41 +++++++++++++++++++ tests/data/test_guitarset.py | 12 ------ tests/data/test_ikala.py | 11 ----- tests/data/test_maestro.py | 15 ------- tests/data/test_medleydb_pitch.py | 14 ------- tests/data/test_slakh.py | 8 ++-- tests/data/test_tf_example_deserialization.py | 7 ++-- 7 files changed, 49 insertions(+), 59 deletions(-) create mode 100644 tests/data/conftest.py diff --git a/tests/data/conftest.py b/tests/data/conftest.py new file mode 100644 index 0000000..d5bad4d --- /dev/null +++ b/tests/data/conftest.py @@ -0,0 +1,41 @@ +import pytest +import json +import pathlib +from unittest import mock + +RESOURCES_PATH = pathlib.Path(__file__).parent.parent / "resources" +GUITAR_SET_TEST_INDEX = json.load(open(RESOURCES_PATH / "data" / "guitarset" / "dummy_index.json")) +IKALA_TEST_INDEX = json.load(open(RESOURCES_PATH / "data" / "ikala" / "dummy_index.json")) +MAESTRO_TEST_INDEX = json.load(open(RESOURCES_PATH / "data" / "maestro" / "dummy_index.json")) +METADATA_TEST_INDEX = json.load(open(RESOURCES_PATH / "data" / "maestro" / "maestro-v2.0.0.json")) +MEDLEYDB_PITCH_TEST_INDEX = json.load(open(RESOURCES_PATH / "data" / "medleydb_pitch" / "dummy_index.json")) + + +@pytest.fixture # type: ignore[misc] +def mock_medleydb_pitch_index() -> None: # type: ignore[misc] + with mock.patch("mirdata.datasets.medleydb_pitch.Dataset.download"): + with mock.patch("mirdata.datasets.medleydb_pitch.Dataset._index", new=MEDLEYDB_PITCH_TEST_INDEX): + yield + + +@pytest.fixture # type: ignore[misc] +def mock_maestro_index() -> None: # type: ignore[misc] + index_with_metadata = MAESTRO_TEST_INDEX + index_with_metadata["metadata"] = METADATA_TEST_INDEX + with mock.patch("mirdata.datasets.maestro.Dataset.download"): + with mock.patch("mirdata.datasets.maestro.Dataset._index", new=index_with_metadata): + yield + + +@pytest.fixture # type: ignore[misc] +def mock_guitarset_index() -> None: # type: ignore[misc] + with mock.patch("mirdata.datasets.guitarset.Dataset.download"): + with mock.patch("mirdata.datasets.guitarset.Dataset._index", new=GUITAR_SET_TEST_INDEX): + yield + + +@pytest.fixture # type: ignore[misc] +def mock_ikala_index() -> None: # type: ignore[misc] + with mock.patch("mirdata.datasets.ikala.Dataset.download"): + with mock.patch("mirdata.datasets.ikala.Dataset._index", new=IKALA_TEST_INDEX): + yield diff --git a/tests/data/test_guitarset.py b/tests/data/test_guitarset.py index 8bcfe4e..abba1d4 100644 --- a/tests/data/test_guitarset.py +++ b/tests/data/test_guitarset.py @@ -14,14 +14,11 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from unittest import mock import apache_beam as beam import itertools import os import pathlib import shutil -import pytest -import json from apache_beam.testing.test_pipeline import TestPipeline from typing import List @@ -37,15 +34,6 @@ RESOURCES_PATH = pathlib.Path(__file__).parent.parent / "resources" TRACK_ID = "00_BN1-129-Eb_comp" -GUITAR_SET_TEST_INDEX = json.load(open(RESOURCES_PATH / "data" / "guitarset" / "dummy_index.json")) - - -@pytest.fixture # type: ignore[misc] -def mock_guitarset_index() -> None: # type: ignore[misc] - with mock.patch("mirdata.datasets.guitarset.Dataset.download"): - with mock.patch("mirdata.datasets.guitarset.Dataset._index", new=GUITAR_SET_TEST_INDEX): - yield - def test_guitarset_to_tf_example(tmp_path: pathlib.Path, mock_guitarset_index: None) -> None: mock_guitarset_home = tmp_path / "guitarset" diff --git a/tests/data/test_ikala.py b/tests/data/test_ikala.py index 6466ea7..3aa9e17 100644 --- a/tests/data/test_ikala.py +++ b/tests/data/test_ikala.py @@ -14,13 +14,10 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import pytest import pathlib -from unittest import mock import apache_beam as beam import itertools import os -import json from apache_beam.testing.test_pipeline import TestPipeline from basic_pitch.data.datasets.ikala import ( @@ -31,14 +28,6 @@ # TODO: Create test_ikala_to_tf_example RESOURCES_PATH = pathlib.Path(__file__).parent.parent / "resources" -IKALA_TEST_INDEX = json.load(open(RESOURCES_PATH / "data" / "ikala" / "dummy_index.json")) - - -@pytest.fixture # type: ignore[misc] -def mock_ikala_index() -> None: # type: ignore[misc] - with mock.patch("mirdata.datasets.ikala.Dataset.download"): - with mock.patch("mirdata.datasets.ikala.Dataset._index", new=IKALA_TEST_INDEX): - yield def test_ikala_invalid_tracks(tmpdir: str) -> None: diff --git a/tests/data/test_maestro.py b/tests/data/test_maestro.py index bd76be1..7435bbe 100644 --- a/tests/data/test_maestro.py +++ b/tests/data/test_maestro.py @@ -16,9 +16,6 @@ # limitations under the License. import os import pathlib -import json -import pytest -from unittest import mock from typing import List import apache_beam as beam @@ -41,18 +38,6 @@ TEST_TRACK_ID = "2004/MIDI-Unprocessed_SMF_02_R1_2004_01-05_ORIG_MID--AUDIO_02_R1_2004_08_Track08_wav" GT_15M_TRACK_ID = "2004/MIDI-Unprocessed_SMF_02_R1_2004_01-05_ORIG_MID--AUDIO_02_R1_2004_05_Track05_wav" -MAESTRO_TEST_INDEX = json.load(open(RESOURCES_PATH / "data" / "maestro" / "dummy_index.json")) -METADATA_TEST_INDEX = json.load(open(RESOURCES_PATH / "data" / "maestro" / "maestro-v2.0.0.json")) - - -@pytest.fixture # type: ignore[misc] -def mock_maestro_index() -> None: # type: ignore[misc] - index_with_metadata = MAESTRO_TEST_INDEX - index_with_metadata["metadata"] = METADATA_TEST_INDEX - with mock.patch("mirdata.datasets.maestro.Dataset.download"): - with mock.patch("mirdata.datasets.maestro.Dataset._index", new=index_with_metadata): - yield - def test_maestro_to_tf_example(tmp_path: pathlib.Path, mock_maestro_index: None) -> None: mock_maestro_home = tmp_path / "maestro" diff --git a/tests/data/test_medleydb_pitch.py b/tests/data/test_medleydb_pitch.py index c8f685c..7ef77e1 100644 --- a/tests/data/test_medleydb_pitch.py +++ b/tests/data/test_medleydb_pitch.py @@ -17,10 +17,6 @@ import apache_beam as beam import itertools import os -import json -import pytest -import pathlib -from unittest import mock from apache_beam.testing.test_pipeline import TestPipeline @@ -32,16 +28,6 @@ # TODO: Create test_medleydb_pitch_to_tf_example -RESOURCES_PATH = pathlib.Path(__file__).parent.parent / "resources" -MEDLEYDB_PITCH_TEST_INDEX = json.load(open(RESOURCES_PATH / "data" / "medleydb_pitch" / "dummy_index.json")) - - -@pytest.fixture # type: ignore[misc] -def mock_medleydb_pitch_index() -> None: # type: ignore[misc] - with mock.patch("mirdata.datasets.medleydb_pitch.Dataset.download"): - with mock.patch("mirdata.datasets.medleydb_pitch.Dataset._index", new=MEDLEYDB_PITCH_TEST_INDEX): - yield - def test_medleydb_pitch_invalid_tracks(tmpdir: str) -> None: split_labels = ["train", "validation"] diff --git a/tests/data/test_slakh.py b/tests/data/test_slakh.py index 0adcb55..8f3c80b 100644 --- a/tests/data/test_slakh.py +++ b/tests/data/test_slakh.py @@ -79,7 +79,7 @@ def create_mock_input_data(data_home: pathlib.Path, input_data: List[Tuple[str, shutil.copy(SLAKH_PATH / split / track_num / "metadata.yaml", track_dir / "metadata.yaml") -def test_slakh_to_tf_example(tmp_path: pathlib.Path) -> None: +def test_slakh_to_tf_example(tmp_path: pathlib.Path, mock_slakh_index: None) -> None: mock_slakh_home = tmp_path / "slakh" mock_slakh_ext = mock_slakh_home / "slakh2100_flac_redux" @@ -105,7 +105,7 @@ def test_slakh_to_tf_example(tmp_path: pathlib.Path) -> None: assert len(data) != 0 -def test_slakh_invalid_tracks(tmp_path: pathlib.Path) -> None: +def test_slakh_invalid_tracks(tmp_path: pathlib.Path, mock_slakh_index: None) -> None: mock_slakh_home = tmp_path / "slakh" mock_slakh_ext = mock_slakh_home / "slakh2100_flac_redux" @@ -132,7 +132,7 @@ def test_slakh_invalid_tracks(tmp_path: pathlib.Path) -> None: assert fp.read().strip() == track_id -def test_slakh_invalid_tracks_omitted(tmp_path: pathlib.Path) -> None: +def test_slakh_invalid_tracks_omitted(tmp_path: pathlib.Path, mock_slakh_index: None) -> None: mock_slakh_home = tmp_path / "slakh" mock_slakh_ext = mock_slakh_home / "slakh2100_flac_redux" @@ -161,7 +161,7 @@ def test_slakh_invalid_tracks_omitted(tmp_path: pathlib.Path) -> None: assert fp.read().strip() == "" -def test_slakh_invalid_tracks_drums(tmp_path: pathlib.Path) -> None: +def test_slakh_invalid_tracks_drums(tmp_path: pathlib.Path, mock_slakh_index: None) -> None: mock_slakh_home = tmp_path / "slakh" mock_slakh_ext = mock_slakh_home / "slakh2100_flac_redux" diff --git a/tests/data/test_tf_example_deserialization.py b/tests/data/test_tf_example_deserialization.py index 379f5bc..9ca3b90 100644 --- a/tests/data/test_tf_example_deserialization.py +++ b/tests/data/test_tf_example_deserialization.py @@ -86,7 +86,7 @@ def mock_and_process(split: str, track_id: str) -> None: return output_home -def test_prepare_datasets(tmp_path: pathlib.Path) -> None: +def test_prepare_datasets(tmp_path: pathlib.Path, mock_guitarset_index: None) -> None: datasets_home = setup_test_resources(tmp_path) ds_train, ds_valid = prepare_datasets( @@ -102,7 +102,7 @@ def test_prepare_datasets(tmp_path: pathlib.Path) -> None: assert ds_valid is not None and isinstance(ds_valid, tf.data.Dataset) -def test_prepare_visualization_dataset(tmp_path: pathlib.Path) -> None: +def test_prepare_visualization_dataset(tmp_path: pathlib.Path, mock_guitarset_index: None) -> None: datasets_home = setup_test_resources(tmp_path) ds_train, ds_valid = prepare_visualization_datasets( @@ -117,7 +117,7 @@ def test_prepare_visualization_dataset(tmp_path: pathlib.Path) -> None: assert ds_valid is not None and isinstance(ds_train, tf.data.Dataset) -def test_sample_datasets(tmp_path: pathlib.Path) -> None: +def test_sample_datasets(tmp_path: pathlib.Path, mock_guitarset_index: None) -> None: """touches the following methods: - transcription_dataset - parse_transcription_tfexample @@ -126,6 +126,7 @@ def test_sample_datasets(tmp_path: pathlib.Path) -> None: - reduce_transcription_inputs - get_sample_weights - _infer_time_size + - _infer_time_size - get_transcription_chunks - extract_random_window - extract_window From 49811f011f611d1105b9de06ced6f3e959be05f4 Mon Sep 17 00:00:00 2001 From: hyperc54 Date: Tue, 11 Nov 2025 17:09:46 +0100 Subject: [PATCH 5/7] format fixture metadata --- tests/data/conftest.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/data/conftest.py b/tests/data/conftest.py index d5bad4d..608f7a3 100644 --- a/tests/data/conftest.py +++ b/tests/data/conftest.py @@ -21,10 +21,11 @@ def mock_medleydb_pitch_index() -> None: # type: ignore[misc] @pytest.fixture # type: ignore[misc] def mock_maestro_index() -> None: # type: ignore[misc] index_with_metadata = MAESTRO_TEST_INDEX - index_with_metadata["metadata"] = METADATA_TEST_INDEX + metadata = {mdata["midi_filename"].split(".")[0]: mdata for mdata in METADATA_TEST_INDEX} with mock.patch("mirdata.datasets.maestro.Dataset.download"): - with mock.patch("mirdata.datasets.maestro.Dataset._index", new=index_with_metadata): - yield + with mock.patch("mirdata.datasets.maestro.Dataset._metadata", new=metadata): + with mock.patch("mirdata.datasets.maestro.Dataset._index", new=index_with_metadata): + yield @pytest.fixture # type: ignore[misc] From e18d9b8972848eaa510a5b3569b83ad2fc886ca5 Mon Sep 17 00:00:00 2001 From: hyperc54 Date: Tue, 11 Nov 2025 17:21:37 +0100 Subject: [PATCH 6/7] rm unused variable --- tests/data/test_ikala.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/tests/data/test_ikala.py b/tests/data/test_ikala.py index 3aa9e17..b2d471e 100644 --- a/tests/data/test_ikala.py +++ b/tests/data/test_ikala.py @@ -14,7 +14,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import pathlib import apache_beam as beam import itertools import os @@ -27,8 +26,6 @@ # TODO: Create test_ikala_to_tf_example -RESOURCES_PATH = pathlib.Path(__file__).parent.parent / "resources" - def test_ikala_invalid_tracks(tmpdir: str) -> None: split_labels = ["train", "validation"] From e65980998bbf2e2a16597b59192d01716d30d051 Mon Sep 17 00:00:00 2001 From: hyperc54 Date: Tue, 11 Nov 2025 17:22:39 +0100 Subject: [PATCH 7/7] also move slakh fixtures --- tests/data/conftest.py | 8 ++++++++ tests/data/test_slakh.py | 13 ------------- 2 files changed, 8 insertions(+), 13 deletions(-) diff --git a/tests/data/conftest.py b/tests/data/conftest.py index 608f7a3..279f481 100644 --- a/tests/data/conftest.py +++ b/tests/data/conftest.py @@ -9,6 +9,14 @@ MAESTRO_TEST_INDEX = json.load(open(RESOURCES_PATH / "data" / "maestro" / "dummy_index.json")) METADATA_TEST_INDEX = json.load(open(RESOURCES_PATH / "data" / "maestro" / "maestro-v2.0.0.json")) MEDLEYDB_PITCH_TEST_INDEX = json.load(open(RESOURCES_PATH / "data" / "medleydb_pitch" / "dummy_index.json")) +SLAKH_TEST_INDEX = json.load(open(RESOURCES_PATH / "data" / "slakh" / "dummy_index.json")) + + +@pytest.fixture # type: ignore[misc] +def mock_slakh_index() -> None: # type: ignore[misc] + with mock.patch("mirdata.datasets.slakh.Dataset.download"): + with mock.patch("mirdata.datasets.slakh.Dataset._index", new=SLAKH_TEST_INDEX): + yield @pytest.fixture # type: ignore[misc] diff --git a/tests/data/test_slakh.py b/tests/data/test_slakh.py index 8f3c80b..6884cec 100644 --- a/tests/data/test_slakh.py +++ b/tests/data/test_slakh.py @@ -19,9 +19,6 @@ import os import pathlib import shutil -import json -import pytest -from unittest import mock from typing import List, Tuple @@ -52,16 +49,6 @@ OMITTED_DRUMS_TRACK_ID = "Track00049-S06" -SLAKH_TEST_INDEX = json.load(open(RESOURCES_PATH / "data" / "slakh" / "dummy_index.json")) - - -@pytest.fixture # type: ignore[misc] -def mock_slakh_index() -> None: # type: ignore[misc] - with mock.patch("mirdata.datasets.slakh.Dataset.download"): - with mock.patch("mirdata.datasets.slakh.Dataset._index", new=SLAKH_TEST_INDEX): - yield - - # Function to generate a sine wave def create_mock_input_data(data_home: pathlib.Path, input_data: List[Tuple[str, str]]) -> None: for track_id, split in input_data: