spotify · hyperc54 · Nov 7, 2025 · Oct 30, 2025 · Oct 30, 2025 · Oct 30, 2025
diff --git a/basic_pitch/constants.py b/basic_pitch/constants.py
@@ -23,7 +23,6 @@
 SEMITONES_PER_OCTAVE = 12  # for frequency bin calculations
 
 FFT_HOP = 256
-N_FFT = 8 * FFT_HOP
 
 NOTES_BINS_PER_SEMITONE = 1
 CONTOURS_BINS_PER_SEMITONE = 3

diff --git a/basic_pitch/data/README.md b/basic_pitch/data/README.md
@@ -5,7 +5,7 @@ The code and scripts in this section deal with training basic pitch on your own.
 * **--runner**: The method used to run the Beam Pipeline for processing the dataset. Options include `DirectRunner`, running directly in the code process running the pipeline, `PortableRunner`, which can be used to run the pipeline in a docker container locally, and `DataflowRunner`, which can be used to run the pipeline in a docker container on Dataflow. 
 * **--timestamped**: If passed, the dataset will be put into a timestamp directory instead of 'splits'.
 * **--batch-size**: Number of examples per tfrecord when partitioning the dataset.
-* **--sdk_container_image**: The Docker container image used to process the data if using `PortableRunner` or `DirectRunner`.
+* **--sdk_container_image**: The Docker container image used to process the data if using `PortableRunner`.
 * **--job_endpoint**: the endpoint where the job is running. It defaults to `embed` which works for `PortableRunner`. 
 
 Additional arguments that work with Beam in general can be used as well, and will be passed along and used by the pipeline. If using `DataflowRunner`, you will be required to pass `--temp_location={Path to GCS Bucket}`, `--staging_location={Path to GCS Bucket}`, `--project={Name of GCS Project}` and `--region={GCS region}`. 
diff --git a/basic_pitch/data/datasets/guitarset.py b/basic_pitch/data/datasets/guitarset.py
@@ -165,7 +165,6 @@ def main(known_args: argparse.Namespace, pipeline_args: List[str]) -> None:
         "save_main_session": True,
         "sdk_container_image": known_args.sdk_container_image,
         "job_endpoint": known_args.job_endpoint,
-        "environment_type": "DOCKER",
         "environment_config": known_args.sdk_container_image,
     }
     pipeline.run(

diff --git a/basic_pitch/data/datasets/ikala.py b/basic_pitch/data/datasets/ikala.py
@@ -165,7 +165,6 @@ def main(known_args: argparse.Namespace, pipeline_args: List[str]) -> None:
         "save_main_session": True,
         "sdk_container_image": known_args.sdk_container_image,
         "job_endpoint": known_args.job_endpoint,
-        "environment_type": "DOCKER",
         "environment_config": known_args.sdk_container_image,
     }
     input_data = create_input_data(known_args.train_percent, known_args.split_seed)

diff --git a/basic_pitch/data/datasets/maestro.py b/basic_pitch/data/datasets/maestro.py
@@ -46,8 +46,6 @@ def __init__(self, source: str) -> None:
         self.source = source
 
     def setup(self) -> None:
-        # Oddly enough we dont want to include the gcs bucket uri.
-        # Just the path within the bucket
         self.maestro_remote = mirdata.initialize("maestro", data_home=self.source)
         self.filesystem = beam.io.filesystems.FileSystems()
 
@@ -89,8 +87,6 @@ def setup(self) -> None:
         import apache_beam as beam
         import mirdata
 
-        # Oddly enough we dont want to include the gcs bucket uri.
-        # Just the path within the bucket
         self.maestro_remote = mirdata.initialize("maestro", data_home=self.source)
         self.filesystem = beam.io.filesystems.FileSystems()
         if self.download:

diff --git a/basic_pitch/data/datasets/medleydb_pitch.py b/basic_pitch/data/datasets/medleydb_pitch.py
@@ -164,7 +164,6 @@ def main(known_args: argparse.Namespace, pipeline_args: List[str]) -> None:
         "save_main_session": True,
         "sdk_container_image": known_args.sdk_container_image,
         "job_endpoint": known_args.job_endpoint,
-        "environment_type": "DOCKER",
         "environment_config": known_args.sdk_container_image,
     }
     pipeline.run(

diff --git a/basic_pitch/data/datasets/slakh.py b/basic_pitch/data/datasets/slakh.py
@@ -200,7 +200,6 @@ def main(known_args: argparse.Namespace, pipeline_args: List[str]) -> None:
         "save_main_session": True,
         "sdk_container_image": known_args.sdk_container_image,
         "job_endpoint": known_args.job_endpoint,
-        "environment_type": "DOCKER",
         "environment_config": known_args.sdk_container_image,
     }
     pipeline.run(

diff --git a/basic_pitch/train.py b/basic_pitch/train.py
@@ -277,7 +277,7 @@ def console_entry_point() -> None:
         args.size_evaluation_callback_datasets,
         datasets_to_use,
         dataset_sampling_frequency,
-        args.dont_sonify,
+        args.no_sonify,
         args.no_contours,
         args.weighted_onset_loss,
         args.positive_onset_weight,