From cdc2852296e49afc90dab1da7faabe4fca1d7fe1 Mon Sep 17 00:00:00 2001
From: hyperc54 <pthary54@gmail.com>
Date: Thu, 30 Oct 2025 18:10:18 +0100
Subject: [PATCH 1/4] rm environment_type DOCKER to make DirectRunner not use
 docker with beam>2.67

---
 basic_pitch/data/datasets/guitarset.py      | 1 -
 basic_pitch/data/datasets/ikala.py          | 1 -
 basic_pitch/data/datasets/maestro.py        | 4 ----
 basic_pitch/data/datasets/medleydb_pitch.py | 1 -
 basic_pitch/data/datasets/slakh.py          | 1 -
 5 files changed, 8 deletions(-)

diff --git a/basic_pitch/data/datasets/guitarset.py b/basic_pitch/data/datasets/guitarset.py
index 97bb2eb..484ad06 100644
--- a/basic_pitch/data/datasets/guitarset.py
+++ b/basic_pitch/data/datasets/guitarset.py
@@ -165,7 +165,6 @@ def main(known_args: argparse.Namespace, pipeline_args: List[str]) -> None:
         "save_main_session": True,
         "sdk_container_image": known_args.sdk_container_image,
         "job_endpoint": known_args.job_endpoint,
-        "environment_type": "DOCKER",
         "environment_config": known_args.sdk_container_image,
     }
     pipeline.run(
diff --git a/basic_pitch/data/datasets/ikala.py b/basic_pitch/data/datasets/ikala.py
index 6ed23d4..2f35cf4 100644
--- a/basic_pitch/data/datasets/ikala.py
+++ b/basic_pitch/data/datasets/ikala.py
@@ -165,7 +165,6 @@ def main(known_args: argparse.Namespace, pipeline_args: List[str]) -> None:
         "save_main_session": True,
         "sdk_container_image": known_args.sdk_container_image,
         "job_endpoint": known_args.job_endpoint,
-        "environment_type": "DOCKER",
         "environment_config": known_args.sdk_container_image,
     }
     input_data = create_input_data(known_args.train_percent, known_args.split_seed)
diff --git a/basic_pitch/data/datasets/maestro.py b/basic_pitch/data/datasets/maestro.py
index e4ab3e7..19e7226 100644
--- a/basic_pitch/data/datasets/maestro.py
+++ b/basic_pitch/data/datasets/maestro.py
@@ -46,8 +46,6 @@ def __init__(self, source: str) -> None:
         self.source = source
 
     def setup(self) -> None:
-        # Oddly enough we dont want to include the gcs bucket uri.
-        # Just the path within the bucket
         self.maestro_remote = mirdata.initialize("maestro", data_home=self.source)
         self.filesystem = beam.io.filesystems.FileSystems()
 
@@ -89,8 +87,6 @@ def setup(self) -> None:
         import apache_beam as beam
         import mirdata
 
-        # Oddly enough we dont want to include the gcs bucket uri.
-        # Just the path within the bucket
         self.maestro_remote = mirdata.initialize("maestro", data_home=self.source)
         self.filesystem = beam.io.filesystems.FileSystems()
         if self.download:
diff --git a/basic_pitch/data/datasets/medleydb_pitch.py b/basic_pitch/data/datasets/medleydb_pitch.py
index c7083ce..dd168e0 100644
--- a/basic_pitch/data/datasets/medleydb_pitch.py
+++ b/basic_pitch/data/datasets/medleydb_pitch.py
@@ -164,7 +164,6 @@ def main(known_args: argparse.Namespace, pipeline_args: List[str]) -> None:
         "save_main_session": True,
         "sdk_container_image": known_args.sdk_container_image,
         "job_endpoint": known_args.job_endpoint,
-        "environment_type": "DOCKER",
         "environment_config": known_args.sdk_container_image,
     }
     pipeline.run(
diff --git a/basic_pitch/data/datasets/slakh.py b/basic_pitch/data/datasets/slakh.py
index 260a77b..57bbd6f 100644
--- a/basic_pitch/data/datasets/slakh.py
+++ b/basic_pitch/data/datasets/slakh.py
@@ -200,7 +200,6 @@ def main(known_args: argparse.Namespace, pipeline_args: List[str]) -> None:
         "save_main_session": True,
         "sdk_container_image": known_args.sdk_container_image,
         "job_endpoint": known_args.job_endpoint,
-        "environment_type": "DOCKER",
         "environment_config": known_args.sdk_container_image,
     }
     pipeline.run(

From ef34f0f114ed5eccec97f1853a90999703609306 Mon Sep 17 00:00:00 2001
From: hyperc54 <pthary54@gmail.com>
Date: Thu, 30 Oct 2025 18:10:44 +0100
Subject: [PATCH 2/4] fix incorrct dont_sonify input

---
 basic_pitch/train.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/basic_pitch/train.py b/basic_pitch/train.py
index 4d675fb..88d6c8c 100644
--- a/basic_pitch/train.py
+++ b/basic_pitch/train.py
@@ -277,7 +277,7 @@ def console_entry_point() -> None:
         args.size_evaluation_callback_datasets,
         datasets_to_use,
         dataset_sampling_frequency,
-        args.dont_sonify,
+        args.no_sonify,
         args.no_contours,
         args.weighted_onset_loss,
         args.positive_onset_weight,

From 80d4edac88c9ea0201ed61b74843a89d586686df Mon Sep 17 00:00:00 2001
From: hyperc54 <pthary54@gmail.com>
Date: Thu, 30 Oct 2025 18:11:09 +0100
Subject: [PATCH 3/4] Remove unused constant

---
 basic_pitch/constants.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/basic_pitch/constants.py b/basic_pitch/constants.py
index bf4f632..c2b717a 100644
--- a/basic_pitch/constants.py
+++ b/basic_pitch/constants.py
@@ -23,7 +23,6 @@
 SEMITONES_PER_OCTAVE = 12  # for frequency bin calculations
 
 FFT_HOP = 256
-N_FFT = 8 * FFT_HOP
 
 NOTES_BINS_PER_SEMITONE = 1
 CONTOURS_BINS_PER_SEMITONE = 3

From da586a9ccf3d5e1393e3df1ebd7237337d27601b Mon Sep 17 00:00:00 2001
From: hyperc54 <pthary54@gmail.com>
Date: Thu, 30 Oct 2025 18:12:04 +0100
Subject: [PATCH 4/4] adapt readme

---
 basic_pitch/data/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/basic_pitch/data/README.md b/basic_pitch/data/README.md
index 4f1eaea..0a679fd 100644
--- a/basic_pitch/data/README.md
+++ b/basic_pitch/data/README.md
@@ -5,7 +5,7 @@ The code and scripts in this section deal with training basic pitch on your own.
 * **--runner**: The method used to run the Beam Pipeline for processing the dataset. Options include `DirectRunner`, running directly in the code process running the pipeline, `PortableRunner`, which can be used to run the pipeline in a docker container locally, and `DataflowRunner`, which can be used to run the pipeline in a docker container on Dataflow. 
 * **--timestamped**: If passed, the dataset will be put into a timestamp directory instead of 'splits'.
 * **--batch-size**: Number of examples per tfrecord when partitioning the dataset.
-* **--sdk_container_image**: The Docker container image used to process the data if using `PortableRunner` or `DirectRunner`.
+* **--sdk_container_image**: The Docker container image used to process the data if using `PortableRunner`.
 * **--job_endpoint**: the endpoint where the job is running. It defaults to `embed` which works for `PortableRunner`. 
 
 Additional arguments that work with Beam in general can be used as well, and will be passed along and used by the pipeline. If using `DataflowRunner`, you will be required to pass `--temp_location={Path to GCS Bucket}`, `--staging_location={Path to GCS Bucket}`, `--project={Name of GCS Project}` and `--region={GCS region}`.