addressing pr comments - undoing incorrect reformat, adding entry for…

… sox in CONTRIBUTING.md, add copyright notice to download.py, remove apache_beam from general dependencies.
spotify · May 8, 2024 · 2c3d4fd · 2c3d4fd
1 parent 3296466
commit 2c3d4fd
Show file tree

Hide file tree

Showing 8 changed files with 30 additions and 56 deletions.
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -13,7 +13,7 @@ We recommend first installing the following non-python dependencies:
     - To install on Windows, run `choco install libsndfile` using [Chocolatey](https://chocolatey.org/)
     - To install on Ubuntu, run `sudo apt-get update && sudo apt-get install --no-install-recommends -y --fix-missing pkg-config libsndfile1`
 - [ffmpeg](https://ffmpeg.org/) is a complete, cross-platform solution to record, convert and stream audio in all `basic-pitch` supported formats
-- sox
+- [sox](https://sourceforge.net/projects/sox/) is a general purpose sound processing utility library used to process and transform training data used for training the `basic-pitch` model. 
 
 To compile a debug build of `basic-pitch` that allows using a debugger (like gdb or lldb), use the following command to build the package locally and install a symbolic link for debugging:
 ```shell

diff --git a/basic_pitch/data/download.py b/basic_pitch/data/download.py
@@ -1,3 +1,19 @@
+#!/usr/bin/env python
+# encoding: utf-8
+#
+# Copyright 2024 Spotify AB
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a cos.pathy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+
 import argparse
 import logging
 import sys

diff --git a/basic_pitch/layers/nnaudio.py b/basic_pitch/layers/nnaudio.py
@@ -280,11 +280,7 @@ def __init__(self, padding: Union[int, Tuple[int]] = 1, **kwargs: Any):
         super(ReflectionPad1D, self).__init__(**kwargs)
 
     def compute_output_shape(self, s: List[int]) -> Tuple[int, int, int]:
-        return (
-            s[0],
-            s[1],
-            (s[2] + 2 * self.padding if isinstance(self.padding, int) else self.padding[0]),
-        )
+        return (s[0], s[1], s[2] + 2 * self.padding if isinstance(self.padding, int) else self.padding[0])
 
     def call(self, x: tf.Tensor) -> tf.Tensor:
         return tf.pad(x, [[0, 0], [0, 0], [self.padding, self.padding]], "REFLECT")
@@ -302,11 +298,7 @@ def __init__(self, padding: Union[int, Tuple[int]] = 1, value: int = 0, **kwargs
         super(ConstantPad1D, self).__init__(**kwargs)
 
     def compute_output_shape(self, s: List[int]) -> Tuple[int, int, int]:
-        return (
-            s[0],
-            s[1],
-            (s[2] + 2 * self.padding if isinstance(self.padding, int) else self.padding[0]),
-        )
+        return (s[0], s[1], s[2] + 2 * self.padding if isinstance(self.padding, int) else self.padding[0])
 
     def call(self, x: tf.Tensor) -> tf.Tensor:
         return tf.pad(x, [[0, 0], [0, 0], [self.padding, self.padding]], "CONSTANT", self.value)
@@ -613,12 +605,7 @@ def call(self, x: tf.Tensor) -> tf.Tensor:
         x = self.reshape_input(x)  # type: ignore
 
         if self.earlydownsample is True:
-            x = downsampling_by_n(
-                x,
-                self.early_downsample_filter,
-                self.downsample_factor,
-                self.match_torch_exactly,
-            )
+            x = downsampling_by_n(x, self.early_downsample_filter, self.downsample_factor, self.match_torch_exactly)
 
         hop = self.hop_length
 
@@ -652,10 +639,7 @@ def call(self, x: tf.Tensor) -> tf.Tensor:
         # Transpose the output to match the output of the other spectrogram layers.
         if self.output_format.lower() == "magnitude":
             # Getting CQT Amplitude
-            return tf.transpose(
-                tf.math.sqrt(tf.math.reduce_sum(tf.math.pow(CQT, 2), axis=-1)),
-                [0, 2, 1],
-            )
+            return tf.transpose(tf.math.sqrt(tf.math.reduce_sum(tf.math.pow(CQT, 2), axis=-1)), [0, 2, 1])
 
         elif self.output_format.lower() == "complex":
             return CQT

diff --git a/basic_pitch/models.py b/basic_pitch/models.py
@@ -53,10 +53,7 @@ def transcription_loss(y_true: tf.Tensor, y_pred: tf.Tensor, label_smoothing: fl
 
 
 def weighted_transcription_loss(
-    y_true: tf.Tensor,
-    y_pred: tf.Tensor,
-    label_smoothing: float,
-    positive_weight: float = 0.5,
+    y_true: tf.Tensor, y_pred: tf.Tensor, label_smoothing: float, positive_weight: float = 0.5
 ) -> tf.Tensor:
     """The transcription loss where the positive and negative true labels are balanced by a weighting factor.
 

diff --git a/basic_pitch/nn.py b/basic_pitch/nn.py
@@ -40,11 +40,7 @@ class HarmonicStacking(tf.keras.layers.Layer):
     """
 
     def __init__(
-        self,
-        bins_per_semitone: int,
-        harmonics: List[float],
-        n_output_freqs: int,
-        name: str = "harmonic_stacking",
+        self, bins_per_semitone: int, harmonics: List[float], n_output_freqs: int, name: str = "harmonic_stacking"
     ):
         """Downsample frequency by stride, upsample channels by 4."""
         super().__init__(trainable=False, name=name)

diff --git a/basic_pitch/note_creation.py b/basic_pitch/note_creation.py
@@ -110,11 +110,7 @@ def model_output_to_notes(
     )
 
 
-def sonify_midi(
-    midi: pretty_midi.PrettyMIDI,
-    save_path: Union[pathlib.Path, str],
-    sr: Optional[int] = 44100,
-) -> None:
+def sonify_midi(midi: pretty_midi.PrettyMIDI, save_path: Union[pathlib.Path, str], sr: Optional[int] = 44100) -> None:
     """Sonify a pretty_midi midi object and save to a file.
 
     Args:
@@ -127,10 +123,7 @@ def sonify_midi(
 
 
 def sonify_salience(
-    gram: np.array,
-    semitone_resolution: float,
-    save_path: Optional[str] = None,
-    thresh: float = 0.2,
+    gram: np.array, semitone_resolution: float, save_path: Optional[str] = None, thresh: float = 0.2
 ) -> Tuple[np.array, int]:
     """Sonify a salience matrix.
 
@@ -181,9 +174,7 @@ def midi_pitch_to_contour_bin(pitch_midi: int) -> np.array:
 
 
 def get_pitch_bends(
-    contours: np.ndarray,
-    note_events: List[Tuple[int, int, int, float]],
-    n_bins_tolerance: int = 25,
+    contours: np.ndarray, note_events: List[Tuple[int, int, int, float]], n_bins_tolerance: int = 25
 ) -> List[Tuple[int, int, int, float, Optional[List[int]]]]:
     """Given note events and contours, estimate pitch bends per note.
     Pitch bends are represented as a sequence of evenly spaced midi pitch bend control units.
@@ -248,13 +239,7 @@ def note_events_to_midi(
     instruments: DefaultDict[int, pretty_midi.Instrument] = defaultdict(
         lambda: pretty_midi.Instrument(program=piano_program)
     )
-    for (
-        start_time,
-        end_time,
-        note_number,
-        amplitude,
-        pitch_bend,
-    ) in note_events_with_pitch_bends:
+    for start_time, end_time, note_number, amplitude, pitch_bend in note_events_with_pitch_bends:
         instrument = instruments[note_number] if multiple_pitch_bends else instruments[0]
         note = pretty_midi.Note(
             velocity=int(np.round(127 * amplitude)),
@@ -319,10 +304,7 @@ def get_infered_onsets(onsets: np.array, frames: np.array, n_diff: int = 2) -> n
 
 
 def constrain_frequency(
-    onsets: np.array,
-    frames: np.array,
-    max_freq: Optional[float],
-    min_freq: Optional[float],
+    onsets: np.array, frames: np.array, max_freq: Optional[float], min_freq: Optional[float]
 ) -> Tuple[np.array, np.array]:
     """Zero out activations above or below the max/min frequencies
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -19,7 +19,6 @@ classifiers = [
 ]
 dependencies = [
 	"coremltools; platform_system == 'Darwin'",
-	"apache_beam",
 	"librosa>=0.8.0",
 	"mir_eval>=0.6",
 	"numpy>=1.18",

diff --git a/tests/test_inference.py b/tests/test_inference.py
@@ -59,13 +59,13 @@ def test_predict() -> None:
         "arr_0"
     ].item()
     for k in expected_model_output.keys():
-        np.testing.assert_allclose(expected_model_output[k], model_output[k], atol=1e-4, rtol=0)  # atol=1e-4, rtol=0
+        np.testing.assert_allclose(expected_model_output[k], model_output[k], atol=1e-4, rtol=0)
 
     expected_note_events = np.load(RESOURCES_PATH / "vocadito_10" / "note_events.npz", allow_pickle=True)["arr_0"]
     assert len(expected_note_events) == len(note_events)
     for expected, calculated in zip(expected_note_events, note_events):
         for i in range(len(expected)):
-            np.testing.assert_allclose(expected[i], calculated[i], atol=1e-4, rtol=0)  # atol=1e-4, rtol=0
+            np.testing.assert_allclose(expected[i], calculated[i], atol=1e-4, rtol=0)
 
 
 def test_predict_with_saves() -> None: