From cdc2852296e49afc90dab1da7faabe4fca1d7fe1 Mon Sep 17 00:00:00 2001 From: hyperc54 Date: Thu, 30 Oct 2025 18:10:18 +0100 Subject: [PATCH 1/4] rm environment_type DOCKER to make DirectRunner not use docker with beam>2.67 --- basic_pitch/data/datasets/guitarset.py | 1 - basic_pitch/data/datasets/ikala.py | 1 - basic_pitch/data/datasets/maestro.py | 4 ---- basic_pitch/data/datasets/medleydb_pitch.py | 1 - basic_pitch/data/datasets/slakh.py | 1 - 5 files changed, 8 deletions(-) diff --git a/basic_pitch/data/datasets/guitarset.py b/basic_pitch/data/datasets/guitarset.py index 97bb2eb..484ad06 100644 --- a/basic_pitch/data/datasets/guitarset.py +++ b/basic_pitch/data/datasets/guitarset.py @@ -165,7 +165,6 @@ def main(known_args: argparse.Namespace, pipeline_args: List[str]) -> None: "save_main_session": True, "sdk_container_image": known_args.sdk_container_image, "job_endpoint": known_args.job_endpoint, - "environment_type": "DOCKER", "environment_config": known_args.sdk_container_image, } pipeline.run( diff --git a/basic_pitch/data/datasets/ikala.py b/basic_pitch/data/datasets/ikala.py index 6ed23d4..2f35cf4 100644 --- a/basic_pitch/data/datasets/ikala.py +++ b/basic_pitch/data/datasets/ikala.py @@ -165,7 +165,6 @@ def main(known_args: argparse.Namespace, pipeline_args: List[str]) -> None: "save_main_session": True, "sdk_container_image": known_args.sdk_container_image, "job_endpoint": known_args.job_endpoint, - "environment_type": "DOCKER", "environment_config": known_args.sdk_container_image, } input_data = create_input_data(known_args.train_percent, known_args.split_seed) diff --git a/basic_pitch/data/datasets/maestro.py b/basic_pitch/data/datasets/maestro.py index e4ab3e7..19e7226 100644 --- a/basic_pitch/data/datasets/maestro.py +++ b/basic_pitch/data/datasets/maestro.py @@ -46,8 +46,6 @@ def __init__(self, source: str) -> None: self.source = source def setup(self) -> None: - # Oddly enough we dont want to include the gcs bucket uri. - # Just the path within the bucket self.maestro_remote = mirdata.initialize("maestro", data_home=self.source) self.filesystem = beam.io.filesystems.FileSystems() @@ -89,8 +87,6 @@ def setup(self) -> None: import apache_beam as beam import mirdata - # Oddly enough we dont want to include the gcs bucket uri. - # Just the path within the bucket self.maestro_remote = mirdata.initialize("maestro", data_home=self.source) self.filesystem = beam.io.filesystems.FileSystems() if self.download: diff --git a/basic_pitch/data/datasets/medleydb_pitch.py b/basic_pitch/data/datasets/medleydb_pitch.py index c7083ce..dd168e0 100644 --- a/basic_pitch/data/datasets/medleydb_pitch.py +++ b/basic_pitch/data/datasets/medleydb_pitch.py @@ -164,7 +164,6 @@ def main(known_args: argparse.Namespace, pipeline_args: List[str]) -> None: "save_main_session": True, "sdk_container_image": known_args.sdk_container_image, "job_endpoint": known_args.job_endpoint, - "environment_type": "DOCKER", "environment_config": known_args.sdk_container_image, } pipeline.run( diff --git a/basic_pitch/data/datasets/slakh.py b/basic_pitch/data/datasets/slakh.py index 260a77b..57bbd6f 100644 --- a/basic_pitch/data/datasets/slakh.py +++ b/basic_pitch/data/datasets/slakh.py @@ -200,7 +200,6 @@ def main(known_args: argparse.Namespace, pipeline_args: List[str]) -> None: "save_main_session": True, "sdk_container_image": known_args.sdk_container_image, "job_endpoint": known_args.job_endpoint, - "environment_type": "DOCKER", "environment_config": known_args.sdk_container_image, } pipeline.run( From ef34f0f114ed5eccec97f1853a90999703609306 Mon Sep 17 00:00:00 2001 From: hyperc54 Date: Thu, 30 Oct 2025 18:10:44 +0100 Subject: [PATCH 2/4] fix incorrct dont_sonify input --- basic_pitch/train.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/basic_pitch/train.py b/basic_pitch/train.py index 4d675fb..88d6c8c 100644 --- a/basic_pitch/train.py +++ b/basic_pitch/train.py @@ -277,7 +277,7 @@ def console_entry_point() -> None: args.size_evaluation_callback_datasets, datasets_to_use, dataset_sampling_frequency, - args.dont_sonify, + args.no_sonify, args.no_contours, args.weighted_onset_loss, args.positive_onset_weight, From 80d4edac88c9ea0201ed61b74843a89d586686df Mon Sep 17 00:00:00 2001 From: hyperc54 Date: Thu, 30 Oct 2025 18:11:09 +0100 Subject: [PATCH 3/4] Remove unused constant --- basic_pitch/constants.py | 1 - 1 file changed, 1 deletion(-) diff --git a/basic_pitch/constants.py b/basic_pitch/constants.py index bf4f632..c2b717a 100644 --- a/basic_pitch/constants.py +++ b/basic_pitch/constants.py @@ -23,7 +23,6 @@ SEMITONES_PER_OCTAVE = 12 # for frequency bin calculations FFT_HOP = 256 -N_FFT = 8 * FFT_HOP NOTES_BINS_PER_SEMITONE = 1 CONTOURS_BINS_PER_SEMITONE = 3 From da586a9ccf3d5e1393e3df1ebd7237337d27601b Mon Sep 17 00:00:00 2001 From: hyperc54 Date: Thu, 30 Oct 2025 18:12:04 +0100 Subject: [PATCH 4/4] adapt readme --- basic_pitch/data/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/basic_pitch/data/README.md b/basic_pitch/data/README.md index 4f1eaea..0a679fd 100644 --- a/basic_pitch/data/README.md +++ b/basic_pitch/data/README.md @@ -5,7 +5,7 @@ The code and scripts in this section deal with training basic pitch on your own. * **--runner**: The method used to run the Beam Pipeline for processing the dataset. Options include `DirectRunner`, running directly in the code process running the pipeline, `PortableRunner`, which can be used to run the pipeline in a docker container locally, and `DataflowRunner`, which can be used to run the pipeline in a docker container on Dataflow. * **--timestamped**: If passed, the dataset will be put into a timestamp directory instead of 'splits'. * **--batch-size**: Number of examples per tfrecord when partitioning the dataset. -* **--sdk_container_image**: The Docker container image used to process the data if using `PortableRunner` or `DirectRunner`. +* **--sdk_container_image**: The Docker container image used to process the data if using `PortableRunner`. * **--job_endpoint**: the endpoint where the job is running. It defaults to `embed` which works for `PortableRunner`. Additional arguments that work with Beam in general can be used as well, and will be passed along and used by the pipeline. If using `DataflowRunner`, you will be required to pass `--temp_location={Path to GCS Bucket}`, `--staging_location={Path to GCS Bucket}`, `--project={Name of GCS Project}` and `--region={GCS region}`.