Upkeep/christmas updates: bugfixes and more (#87)

* Fix PYIN bugs and failures * Switch to CMake * Fix power-of-two FFT/autocorrelation with r2c/c2r transforms * Remove SWIPE algorithm * Improve sine waves and add many more unit tests
sevagh · Dec 27, 2023 · faa04bb · faa04bb
1 parent 4d94d7e
commit faa04bb
Show file tree

Hide file tree

Showing 55 changed files with 1,356 additions and 976 deletions.
diff --git a/.dockerignore b/.dockerignore
@@ -1,9 +1,9 @@
 *
 !src
-!lib
+!cmake
 !misc
 !include
 !test
 !degraded_audio_tests
 !wav_analyzer
-!Makefile
+!CMakeLists.txt
diff --git a/.gitattributes b/.gitattributes
@@ -1,5 +1,5 @@
 *.png filter=lfs diff=lfs merge=lfs -text
-*.txt filter=lfs diff=lfs merge=lfs -text
+misc/samples/*.txt filter=lfs diff=lfs merge=lfs -text
 misc/* linguist-documentation
 *.wav filter=lfs diff=lfs merge=lfs -text
 *.aiff filter=lfs diff=lfs merge=lfs -text
diff --git a/.gitignore b/.gitignore
@@ -1,2 +1,2 @@
-lib
 .dir-locals.el
+/build
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -0,0 +1,84 @@
+cmake_minimum_required(VERSION 3.10)
+project(pitch_detection VERSION 1.0 LANGUAGES CXX)
+
+list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
+
+set(CMAKE_CXX_STANDARD 17)
+set(CMAKE_CXX_STANDARD_REQUIRED True)
+
+# use this for dependency graph generation
+#set_property(GLOBAL PROPERTY GRAPHVIZ_EXPORT_TARGETS TRUE)
+
+set(CMAKE_CXX_FLAGS "-Wall -Wextra -ansi -pedantic -fext-numeric-literals -fopenmp")
+set(CMAKE_CXX_FLAGS_DEBUG "-g")
+set(CMAKE_CXX_FLAGS_RELEASE "-O3 -march=native -flto")
+
+include_directories(include)
+include_directories(${CMAKE_CURRENT_SOURCE_DIR}/test)
+
+find_package(mlpack REQUIRED)
+find_package(FFTS REQUIRED)
+
+# Assuming FFTS does not have a built-in find module
+
+# Add the library target
+file(GLOB_RECURSE LIB_SOURCES "src/*.cpp")
+add_library(pitch_detection SHARED ${LIB_SOURCES})
+target_link_libraries(pitch_detection PUBLIC
+    ${MLPACK_LIBRARIES}
+    ${FFTS_LIBRARIES})
+
+find_package(gflags QUIET)
+find_package(libnyquist QUIET)
+find_package(PkgConfig QUIET)
+pkg_search_module(OPUS QUIET opus)
+pkg_search_module(WAVPACK QUIET wavpack)
+
+if(gflags_FOUND AND OPUS_FOUND AND WAVPACK_FOUND AND libnyquist_FOUND)
+    file(GLOB WAV_ANALYZER_SOURCES "wav_analyzer/*.cpp")
+    add_executable(wav_analyzer ${WAV_ANALYZER_SOURCES})
+    target_link_libraries(wav_analyzer PRIVATE
+        pitch_detection
+        ${GFLAGS_LIBRARIES}
+        ${OPUS_LIBRARIES}
+        ${WAVPACK_LIBRARIES}
+        libnyquist)
+
+    target_include_directories(wav_analyzer PRIVATE
+        ${GFLAGS_INCLUDE_DIRS}
+        ${OPUS_INCLUDE_DIRS}
+        ${WAVPACK_INCLUDE_DIRS}
+        ${LIBNYQUIST_INCLUDE_DIRS})
+
+    link_directories(
+        ${GFLAGS_LIBRARY_DIRS}
+        ${OPUS_LIBRARY_DIRS}
+        ${WAVPACK_LIBRARY_DIRS}
+        ${LIBYNQUIST_LIBRARY_DIRS})
+endif()
+
+include(CTest)
+find_package(GTest QUIET)
+find_package(opus QUIET)
+find_package(lib QUIET)
+if(GTEST_FOUND)
+    enable_testing()
+    file(GLOB TEST_SOURCES "test/test*.cpp" "test/util.cpp")
+    add_executable(pitch_tests ${TEST_SOURCES})
+    target_link_libraries(pitch_tests PRIVATE
+        pitch_detection
+        GTest::GTest
+        GTest::Main)
+endif()
+
+find_package(benchmark QUIET)
+if(benchmark_FOUND)
+    file(GLOB BENCH_SOURCES "test/bench.cpp" "test/util.cpp")
+    add_executable(pitch_bench ${BENCH_SOURCES})
+    target_link_libraries(pitch_bench PRIVATE
+        pitch_detection
+        benchmark::benchmark)
+endif()
+
+install(TARGETS pitch_detection LIBRARY DESTINATION lib)
+install(FILES include/pitch_detection.h DESTINATION include)
diff --git a/Dockerfile b/Dockerfile
@@ -11,8 +11,6 @@ git \
 cmake \
 gcc \
 g++ \
-libblas-dev \
-liblapack-dev \
 libboost-dev \
 libarmadillo-dev \
 libmlpack-dev \
@@ -29,6 +27,6 @@ RUN cd /usr/src \
 && make install
 
 # Build and install the pitch-detection library, as well as the tests and benchmarks
-RUN cd /usr/src/pitch-detection && make clean all && make -C test clean all && make install
+RUN cd /usr/src/pitch-detection && cmake -S . -B build -DCMAKE_BUILD_TYPE=Release && cmake --build "build"
 
 LABEL Name=pitch-detection Version=0.0.1
diff --git a/Makefile b/Makefile
diff --git a/README.md b/README.md
@@ -1,94 +1,123 @@
-### Pitch detection algorithms
+# pitch-detection
 
 Autocorrelation-based C++ pitch detection algorithms with **O(nlogn) or lower** running time:
 
 * McLeod pitch method - [2005 paper](http://miracle.otago.ac.nz/tartini/papers/A_Smarter_Way_to_Find_Pitch.pdf) - [visualization](./misc/mcleod)
 * YIN(-FFT) - [2002 paper](http://audition.ens.fr/adc/pdf/2002_JASA_YIN.pdf) - [visualization](./misc/yin)
 * Probabilistic YIN - [2014 paper](https://www.eecs.qmul.ac.uk/~simond/pub/2014/MauchDixon-PYIN-ICASSP2014.pdf)
 * Probabilistic MPM - [my own invention](./misc/probabilistic-mcleod)
-* SWIPE' - [2007 paper](https://pdfs.semanticscholar.org/0fd2/6e267cfa9b6d519967ea00db4ffeac272777.pdf) - [transliterated to C++ from kylebgorman's C implementation](https://github.com/kylebgorman/swipe)\*, \*\*
 
-\*: SWIPE' appears to be O(n) but with an enormous constant factor. The implementation complexity is much higher than MPM and YIN and it brings in additional dependencies (BLAS + LAPACK).
+The size of the FFT used is the same as the size of the input waveform, such that the output is a single pitch for the entire waveform.
 
-\*\*: There's a parallel version of SWIPE, [Aud-SWIPE-P](https://github.com/saul-calderonramirez/Aud-SWIPE-P).
+Librosa (among other libraries) uses the STFT to create _frames_ of the input waveform, and applies pitch tracking to each frame with a fixed FFT size (typically 2048 or some other power of two). If you want to track the temporal evolution of pitches in sub-sections of the waveform, you have to handle the waveform splitting yourself (look at [wav_analyzer](./wav_analyzer/wav_analyzer.cpp) for more details).
 
-Suggested usage of this library can be seen in the utility [wav_analyzer](./wav_analyzer), which divides a wav file into chunks of 0.01s and checks the pitch of each chunk. Sample output of wav_analyzer:
+## :postal_horn: Latest news :newspaper: 
+
+Dec 27, 2023 :santa: release:
+* Removed SWIPE' algorithm
+    * It is not based on autocorrelation, I skipped it in all of the tests, and my implementation was basically copy-pasted from [kylebgorman/swipe](https://github.com/kylebgorman/swipe): just use their code instead!
+* Fix autocorrelation (in YIN and MPM) for power-of-two sizes in FFTS (see [ffts issue #65](https://github.com/anthonix/ffts/issues/65)) by using r2c/c2r transforms (addresses [bug #72](https://github.com/sevagh/pitch-detection/issues/72) reported by jeychenne)
+* Fix PYIN bugs to pass all test cases (addresses jansommer's comments in [pull-request #84](https://github.com/sevagh/pitch-detection/pull/84#issuecomment-1843623594))
+* Added many more unit tests, all passing (228/228)
+
+## Other programming languages
+
+* Go: [Go implementation of YIN](./misc/yin) in this repo (for tutorial purposes)
+* Rust: [Rust implementation of MPM](./misc/mcleod) in this repo (for tutorial purposes)
+* Python: [transcribe](https://github.com/sevagh/transcribe) is a Python version of MPM for a proof-of-concept of primitive pitch transcription
+* Javascript (WebAssembly): [pitchlite](https://github.com/sevagh/pitchlite) has WASM modules of MPM/YIN running at realtime speeds in the browser, and also introduces sub-chunk detection to return the overall pitch of the chunk and the temporal sub-sequence of pitches within the chunk
+
+## Usage
+
+Suggested usage of this library can be seen in the utility [wav_analyzer](./wav_analyzer) which divides a wav file into chunks of 0.01s and checks the pitch of each chunk. Sample output of wav_analyzer:
 
 ```
-At t: 0.5
-        mpm: 162.529
-        yin: 162.543
-        swipe: 162.183
-        pmpm: 162.529
-        pyin: 162.543
+std::vector<float> chunk; // chunk of audio
+
+float pitch_mpm = pitch::mpm(chunk, sample_rate);
+float pitch_yin = pitch::yin(chunk, sample_rate);
 ```
 
-### Degraded audio tests
+## Tests
 
-All testing files are [here](./degraded_audio_tests) - the progressive degradations are described by the respective numbered JSON file, generated using [audio-degradation-toolbox](https://github.com/sevagh/audio-degradation-toolbox). The original clip is a Viola playing E3 from the [University of Iowa MIS](http://theremin.music.uiowa.edu/MIS.html).
+### Unit tests
 
-The results come from parsing the output of wav_analyzer to count how many 0.1s slices of the input clip were in the ballpark of the expected value of 164.81 - I considered anything 160-169 to be acceptable:
+There are unit tests that use sinewaves (both generated with `std::sin` and with [librosa.tone](https://librosa.org/doc/main/generated/librosa.tone.html)), and instrument tests using txt files containing waveform samples from the [University of Iowa MIS](http://theremin.music.uiowa.edu/MIS.html) recordings:
+```
+$ ./build/pitch_tests
+Running main() from ./googletest/src/gtest_main.cc
+[==========] Running 228 tests from 22 test suites.
+[----------] Global test environment set-up.
+[----------] 2 tests from MpmSinewaveTestManualAllocFloat
+[ RUN      ] MpmSinewaveTestManualAllocFloat.OneAllocMultipleFreqFromFile
+[       OK ] MpmSinewaveTestManualAllocFloat.OneAllocMultipleFreqFromFile (38 ms)
+...
+[----------] 5 tests from YinInstrumentTestFloat
+...
+[ RUN      ] YinInstrumentTestFloat.Acoustic_E2_44100
+[       OK ] YinInstrumentTestFloat.Acoustic_E2_44100 (1 ms)
+[ RUN      ] YinInstrumentTestFloat.Classical_FSharp4_48000
+[       OK ] YinInstrumentTestFloat.Classical_FSharp4_48000 (58 ms)
+[----------] 5 tests from YinInstrumentTestFloat (174 ms total)
+...
+[----------] 5 tests from MpmInstrumentTestFloat
+[ RUN      ] MpmInstrumentTestFloat.Violin_A4_44100
+[       OK ] MpmInstrumentTestFloat.Violin_A4_44100 (61 ms)
+[ RUN      ] MpmInstrumentTestFloat.Piano_B4_44100
+[       OK ] MpmInstrumentTestFloat.Piano_B4_44100 (24 ms)
+
+...
+[==========] 228 tests from 22 test suites ran. (2095 ms total)
+[  PASSED  ] 228 tests.
+```
 
-| Degradation level | MPM # correct | YIN # correct | SWIPE' # correct |
-| ------------- | ------------- | ------------- | ------------- |
-| 0 | 26 | 22 | 5 |
-| 1 | 23 | 21 | 13 |
-| 2 | 19 | 21 | 9 |
-| 3 | 18 | 19 | 7 |
-| 4 | 19 | 19 | 6 |
-| 5 | 18 | 19 | 5 |
+### Degraded audio tests
 
-### Build and install
+All testing files are [here](./misc/degraded_audio_tests) - the progressive degradations are described by the respective numbered JSON file, generated using [audio-degradation-toolbox](https://github.com/sevagh/audio-degradation-toolbox). The original clip is a Viola playing E3 from the [University of Iowa MIS](http://theremin.music.uiowa.edu/MIS.html). The results come from parsing the output of wav_analyzer to count how many 0.1s slices of the input clip were in the ballpark of the expected value of 164.81 - I considered anything 160-169 to be acceptable:
 
-Using this project should be as easy as `make && sudo make install` on Linux with a modern GCC - I don't officially support other platforms.
+| Degradation level | MPM # correct | YIN # correct |
+| ------------- | ------------- | ------------- |
+| 0 | 26 | 22 |
+| 1 | 23 | 21 |
+| 2 | 19 | 21 |
+| 3 | 18 | 19 |
+| 4 | 19 | 19 |
+| 5 | 18 | 19 |
 
-This project depends on [ffts](https://github.com/anthonix/ffts), BLAS/LAPACK, and mlpack. To run the tests, you need [googletest](https://github.com/google/googletest), and run `make -C test/ && ./test/test`. To run the bench, you need [google benchmark](https://github.com/google/benchmark), and run `make -C test/ bench && ./test/bench`.
+## Build and install
 
-Build and install pitch_detection, run the tests, and build the sample application, wav_analyzer:
+You need Linux, cmake, and gcc (I don't officially support other platforms). The library depends on [ffts](https://github.com/anthonix/ffts) and [mlpack](https://www.mlpack.org/). The tests depend on [libnyquist](https://github.com/ddiakopoulos/libnyquist), [googletest](https://github.com/google/googletest), and [google benchmark](https://github.com/google/benchmark). Dependency graph:
+![dep-graph](./misc/deps.png)
 
+Build and install with cmake:
 ```bash
-# build libpitch_detection.so
-make clean all
+cmake -S . -B build -DCMAKE_BUILD_TYPE=Release
+cmake --build "build"
 
-# build tests and benches
-make -C test clean all
+# install to your system
+cd build && make install
 
 # run tests and benches 
-./test/test
-./test/bench
-
-# install the library and headers to `/usr/local/lib` and `/usr/local/include`
-sudo make install
+./build/pitch_tests
+./build/pitch_bench
 
-# build and run C++ sample
-make -C wav_analyzer clean all
-./wav_analyzer/wav_analyzer
+# run wav_analyzer
+./build/wav_analyzer
 ```
 
-#### Docker
-
-To simplify the setup, there's a [Dockerfile](./Dockerfile) that sets up a Ubuntu container with all the dependencies for compiling the library and running the included tests and benchmarks. You can build the image or pull it from DockerHub ([esimkowitz/pitchdetection](https://hub.docker.com/repository/docker/esimkowitz/pitchdetection)):
+### Docker
 
+To simplify the setup, there's a [Dockerfile](./Dockerfile) that sets up a Ubuntu container with all the dependencies for compiling the library and running the included tests and benchmarks:
 ```bash
 # build
 $ docker build --rm --pull -f "Dockerfile" -t pitchdetection:latest "."
 $ docker run --rm --init -it pitchdetection:latest
-
-# pull
-$ docker pull esimkowitz/pitchdetection:latest
-$ docker run --rm --init -it esimkowitz/pitchdetection:latest
-```
-
-Once you're in the container, run the tests and benches:
-
-```bash
-./test/test
-./test/bench
 ```
+**n.b.** You can pull the [esimkowitz/pitchdetection](https://hub.docker.com/repository/docker/esimkowitz/pitchdetection) image from DockerHub, but I can't promise that it's up-to-date.
 
-### Usage
+## Detailed usage
 
-Read the [header](./include/pitch_detection.h) and [sample wav_analyzer](./wav_analyzer).
+Read the [header](./include/pitch_detection.h) and the example [wav_analyzer program](./wav_analyzer).
 
 The namespaces are `pitch` and `pitch_alloc`. The functions and classes are templated for `<double>` and `<float>` support.
 
@@ -103,7 +132,6 @@ double pitch_yin = pitch::yin<double>(audio_buffer, 48000);
 double pitch_mpm = pitch::mpm<double>(audio_buffer, 48000);
 double pitch_pyin = pitch::pyin<double>(audio_buffer, 48000);
 double pitch_pmpm = pitch::pmpm<double>(audio_buffer, 48000);
-double pitch_swipe = pitch::swipe<double>(audio_buffer, 48000);
 
 pitch_alloc::Mpm<double> ma(8192);
 pitch_alloc::Yin<double> ya(8192);

diff --git a/cmake/FindFFTS.cmake b/cmake/FindFFTS.cmake
@@ -0,0 +1,18 @@
+# FindFFTS.cmake
+
+# Try to find FFTS
+# Once done, this will define
+# FFTS_FOUND - System has FFTS
+# FFTS_INCLUDE_DIRS - The FFTS include directories
+# FFTS_LIBRARIES - The libraries needed to use FFTS
+
+find_path(FFTS_INCLUDE_DIR NAMES ffts/ffts.h)
+find_library(FFTS_LIBRARY NAMES ffts)
+
+include(FindPackageHandleStandardArgs)
+find_package_handle_standard_args(FFTS DEFAULT_MSG FFTS_LIBRARY FFTS_INCLUDE_DIR)
+
+if(FFTS_FOUND)
+  set(FFTS_INCLUDE_DIRS ${FFTS_INCLUDE_DIR})
+  set(FFTS_LIBRARIES ${FFTS_LIBRARY})
+endif()