Skip to content

Commit

Permalink
addressing pr comments - undoing incorrect reformat, adding entry for…
Browse files Browse the repository at this point in the history
… sox in CONTRIBUTING.md, add copyright notice to download.py, remove apache_beam from general dependencies.
  • Loading branch information
bgenchel committed May 8, 2024
1 parent 3296466 commit 2c3d4fd
Show file tree
Hide file tree
Showing 8 changed files with 30 additions and 56 deletions.
2 changes: 1 addition & 1 deletion CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ We recommend first installing the following non-python dependencies:
- To install on Windows, run `choco install libsndfile` using [Chocolatey](https://chocolatey.org/)
- To install on Ubuntu, run `sudo apt-get update && sudo apt-get install --no-install-recommends -y --fix-missing pkg-config libsndfile1`
- [ffmpeg](https://ffmpeg.org/) is a complete, cross-platform solution to record, convert and stream audio in all `basic-pitch` supported formats
- sox
- [sox](https://sourceforge.net/projects/sox/) is a general purpose sound processing utility library used to process and transform training data used for training the `basic-pitch` model.

To compile a debug build of `basic-pitch` that allows using a debugger (like gdb or lldb), use the following command to build the package locally and install a symbolic link for debugging:
```shell
Expand Down
16 changes: 16 additions & 0 deletions basic_pitch/data/download.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,19 @@
#!/usr/bin/env python
# encoding: utf-8
#
# Copyright 2024 Spotify AB
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a cos.pathy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and

import argparse
import logging
import sys
Expand Down
24 changes: 4 additions & 20 deletions basic_pitch/layers/nnaudio.py
Original file line number Diff line number Diff line change
Expand Up @@ -280,11 +280,7 @@ def __init__(self, padding: Union[int, Tuple[int]] = 1, **kwargs: Any):
super(ReflectionPad1D, self).__init__(**kwargs)

def compute_output_shape(self, s: List[int]) -> Tuple[int, int, int]:
return (
s[0],
s[1],
(s[2] + 2 * self.padding if isinstance(self.padding, int) else self.padding[0]),
)
return (s[0], s[1], s[2] + 2 * self.padding if isinstance(self.padding, int) else self.padding[0])

def call(self, x: tf.Tensor) -> tf.Tensor:
return tf.pad(x, [[0, 0], [0, 0], [self.padding, self.padding]], "REFLECT")
Expand All @@ -302,11 +298,7 @@ def __init__(self, padding: Union[int, Tuple[int]] = 1, value: int = 0, **kwargs
super(ConstantPad1D, self).__init__(**kwargs)

def compute_output_shape(self, s: List[int]) -> Tuple[int, int, int]:
return (
s[0],
s[1],
(s[2] + 2 * self.padding if isinstance(self.padding, int) else self.padding[0]),
)
return (s[0], s[1], s[2] + 2 * self.padding if isinstance(self.padding, int) else self.padding[0])

def call(self, x: tf.Tensor) -> tf.Tensor:
return tf.pad(x, [[0, 0], [0, 0], [self.padding, self.padding]], "CONSTANT", self.value)
Expand Down Expand Up @@ -613,12 +605,7 @@ def call(self, x: tf.Tensor) -> tf.Tensor:
x = self.reshape_input(x) # type: ignore

if self.earlydownsample is True:
x = downsampling_by_n(
x,
self.early_downsample_filter,
self.downsample_factor,
self.match_torch_exactly,
)
x = downsampling_by_n(x, self.early_downsample_filter, self.downsample_factor, self.match_torch_exactly)

hop = self.hop_length

Expand Down Expand Up @@ -652,10 +639,7 @@ def call(self, x: tf.Tensor) -> tf.Tensor:
# Transpose the output to match the output of the other spectrogram layers.
if self.output_format.lower() == "magnitude":
# Getting CQT Amplitude
return tf.transpose(
tf.math.sqrt(tf.math.reduce_sum(tf.math.pow(CQT, 2), axis=-1)),
[0, 2, 1],
)
return tf.transpose(tf.math.sqrt(tf.math.reduce_sum(tf.math.pow(CQT, 2), axis=-1)), [0, 2, 1])

elif self.output_format.lower() == "complex":
return CQT
Expand Down
5 changes: 1 addition & 4 deletions basic_pitch/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,10 +53,7 @@ def transcription_loss(y_true: tf.Tensor, y_pred: tf.Tensor, label_smoothing: fl


def weighted_transcription_loss(
y_true: tf.Tensor,
y_pred: tf.Tensor,
label_smoothing: float,
positive_weight: float = 0.5,
y_true: tf.Tensor, y_pred: tf.Tensor, label_smoothing: float, positive_weight: float = 0.5
) -> tf.Tensor:
"""The transcription loss where the positive and negative true labels are balanced by a weighting factor.
Expand Down
6 changes: 1 addition & 5 deletions basic_pitch/nn.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,7 @@ class HarmonicStacking(tf.keras.layers.Layer):
"""

def __init__(
self,
bins_per_semitone: int,
harmonics: List[float],
n_output_freqs: int,
name: str = "harmonic_stacking",
self, bins_per_semitone: int, harmonics: List[float], n_output_freqs: int, name: str = "harmonic_stacking"
):
"""Downsample frequency by stride, upsample channels by 4."""
super().__init__(trainable=False, name=name)
Expand Down
28 changes: 5 additions & 23 deletions basic_pitch/note_creation.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,11 +110,7 @@ def model_output_to_notes(
)


def sonify_midi(
midi: pretty_midi.PrettyMIDI,
save_path: Union[pathlib.Path, str],
sr: Optional[int] = 44100,
) -> None:
def sonify_midi(midi: pretty_midi.PrettyMIDI, save_path: Union[pathlib.Path, str], sr: Optional[int] = 44100) -> None:
"""Sonify a pretty_midi midi object and save to a file.
Args:
Expand All @@ -127,10 +123,7 @@ def sonify_midi(


def sonify_salience(
gram: np.array,
semitone_resolution: float,
save_path: Optional[str] = None,
thresh: float = 0.2,
gram: np.array, semitone_resolution: float, save_path: Optional[str] = None, thresh: float = 0.2
) -> Tuple[np.array, int]:
"""Sonify a salience matrix.
Expand Down Expand Up @@ -181,9 +174,7 @@ def midi_pitch_to_contour_bin(pitch_midi: int) -> np.array:


def get_pitch_bends(
contours: np.ndarray,
note_events: List[Tuple[int, int, int, float]],
n_bins_tolerance: int = 25,
contours: np.ndarray, note_events: List[Tuple[int, int, int, float]], n_bins_tolerance: int = 25
) -> List[Tuple[int, int, int, float, Optional[List[int]]]]:
"""Given note events and contours, estimate pitch bends per note.
Pitch bends are represented as a sequence of evenly spaced midi pitch bend control units.
Expand Down Expand Up @@ -248,13 +239,7 @@ def note_events_to_midi(
instruments: DefaultDict[int, pretty_midi.Instrument] = defaultdict(
lambda: pretty_midi.Instrument(program=piano_program)
)
for (
start_time,
end_time,
note_number,
amplitude,
pitch_bend,
) in note_events_with_pitch_bends:
for start_time, end_time, note_number, amplitude, pitch_bend in note_events_with_pitch_bends:
instrument = instruments[note_number] if multiple_pitch_bends else instruments[0]
note = pretty_midi.Note(
velocity=int(np.round(127 * amplitude)),
Expand Down Expand Up @@ -319,10 +304,7 @@ def get_infered_onsets(onsets: np.array, frames: np.array, n_diff: int = 2) -> n


def constrain_frequency(
onsets: np.array,
frames: np.array,
max_freq: Optional[float],
min_freq: Optional[float],
onsets: np.array, frames: np.array, max_freq: Optional[float], min_freq: Optional[float]
) -> Tuple[np.array, np.array]:
"""Zero out activations above or below the max/min frequencies
Expand Down
1 change: 0 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ classifiers = [
]
dependencies = [
"coremltools; platform_system == 'Darwin'",
"apache_beam",
"librosa>=0.8.0",
"mir_eval>=0.6",
"numpy>=1.18",
Expand Down
4 changes: 2 additions & 2 deletions tests/test_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,13 +59,13 @@ def test_predict() -> None:
"arr_0"
].item()
for k in expected_model_output.keys():
np.testing.assert_allclose(expected_model_output[k], model_output[k], atol=1e-4, rtol=0) # atol=1e-4, rtol=0
np.testing.assert_allclose(expected_model_output[k], model_output[k], atol=1e-4, rtol=0)

expected_note_events = np.load(RESOURCES_PATH / "vocadito_10" / "note_events.npz", allow_pickle=True)["arr_0"]
assert len(expected_note_events) == len(note_events)
for expected, calculated in zip(expected_note_events, note_events):
for i in range(len(expected)):
np.testing.assert_allclose(expected[i], calculated[i], atol=1e-4, rtol=0) # atol=1e-4, rtol=0
np.testing.assert_allclose(expected[i], calculated[i], atol=1e-4, rtol=0)


def test_predict_with_saves() -> None:
Expand Down

0 comments on commit 2c3d4fd

Please sign in to comment.