From 68b135aeeb09ec3904c9f64c102472c239c57924 Mon Sep 17 00:00:00 2001
From: Chris Robinson <chris.kcat@gmail.com>
Date: Sun, 24 Mar 2024 02:04:54 -0700
Subject: [PATCH] Simplify calculating the 90-degree phase shift FIR filter

---
 common/phase_shifter.h | 60 +++++++++++++-----------------------------
 core/uhjfilter.cpp     | 56 ++++++++++++++++++---------------------
 2 files changed, 43 insertions(+), 73 deletions(-)
diff --git a/common/phase_shifter.h b/common/phase_shifter.h
index f399f5db7..a2f0d2b82 100644
--- a/common/phase_shifter.h
+++ b/common/phase_shifter.h
@@ -8,12 +8,10 @@
 #endif
 
 #include <array>
-#include <complex>
+#include <cmath>
 #include <cstddef>
-#include <limits>
-#include <vector>
 
-#include "alcomplex.h"
+#include "alnumbers.h"
 #include "alspan.h"
 
 
@@ -30,47 +28,25 @@ struct PhaseShifterT {
 
     alignas(16) std::array<float,FilterSize/2> mCoeffs{};
 
-    /* Some notes on this filter construction.
-     *
-     * A wide-band phase-shift filter needs a delay to maintain linearity. A
-     * dirac impulse in the center of a time-domain buffer represents a filter
-     * passing all frequencies through as-is with a pure delay. Converting that
-     * to the frequency domain, adjusting the phase of each frequency bin by
-     * +90 degrees, then converting back to the time domain, results in a FIR
-     * filter that applies a +90 degree wide-band phase-shift.
-     *
-     * A particularly notable aspect of the time-domain filter response is that
-     * every other coefficient is 0. This allows doubling the effective size of
-     * the filter, by storing only the non-0 coefficients and double-stepping
-     * over the input to apply it.
-     *
-     * Additionally, the resulting filter is independent of the sample rate.
-     * The same filter can be applied regardless of the device's sample rate
-     * and achieve the same effect.
-     */
     PhaseShifterT()
     {
-        using complex_d = std::complex<double>;
-        constexpr std::size_t fft_size{FilterSize};
-        constexpr std::size_t half_size{fft_size / 2};
-
-        auto fftBuffer = std::vector<complex_d>(fft_size, complex_d{});
-        fftBuffer[half_size] = 1.0;
-
-        forward_fft(al::span{fftBuffer});
-        fftBuffer[0] *= std::numeric_limits<double>::epsilon();
-        for(std::size_t i{1};i < half_size;++i)
-            fftBuffer[i] = complex_d{-fftBuffer[i].imag(), fftBuffer[i].real()};
-        fftBuffer[half_size] *= std::numeric_limits<double>::epsilon();
-        for(std::size_t i{half_size+1};i < fft_size;++i)
-            fftBuffer[i] = std::conj(fftBuffer[fft_size - i]);
-        inverse_fft(al::span{fftBuffer});
-
-        auto fftiter = fftBuffer.begin() + fft_size - 1;
-        for(float &coeff : mCoeffs)
+        /* Every other coefficient is 0, so we only need to calculate and store
+         * the non-0 terms and double-step over the input to apply it. The
+         * calculated coefficients are in reverse to make applying in the time-
+         * domain more efficient.
+         */
+        for(std::size_t i{0};i < FilterSize/2;++i)
         {
-            coeff = static_cast<float>(fftiter->real() / double{fft_size});
-            fftiter -= 2;
+            const int k{static_cast<int>(i*2 + 1) - int{FilterSize/2}};
+
+            /* Calculate the Blackman window value for this coefficient. */
+            const double w{2.0*al::numbers::pi * static_cast<double>(i*2 + 1)
+                / double{FilterSize}};
+            const double window{0.3635819 - 0.4891775*std::cos(w) + 0.1365995*std::cos(2.0*w)
+                - 0.0106411*std::cos(3.0*w)};
+
+            const double pk{al::numbers::pi * static_cast<double>(k)};
+            mCoeffs[i] = static_cast<float>(window * (1.0-std::cos(pk)) / pk);
         }
     }
 
diff --git a/core/uhjfilter.cpp b/core/uhjfilter.cpp
index 9b6b526a9..2823a024b 100644
--- a/core/uhjfilter.cpp
+++ b/core/uhjfilter.cpp
@@ -58,53 +58,47 @@ struct SegmentedFilter {
 
     SegmentedFilter() : mFft{sFftLength, PFFFT_REAL}
     {
-        using complex_d = std::complex<double>;
-        constexpr size_t fft_size{N};
-        constexpr size_t half_size{fft_size / 2};
+        static constexpr size_t fft_size{N};
 
-        /* To set up the filter, we need to generate the desired response.
-         * Start with a pure delay that passes all frequencies through.
-         */
-        auto fftBuffer = std::vector<complex_d>(fft_size, complex_d{});
-        fftBuffer[half_size] = 1.0;
-
-        /* Convert to the frequency domain, shift the phase of each bin by +90
-         * degrees, then convert back to the time domain.
-         *
-         * NOTE: The 0- and half-frequency are always real for a real signal.
-         * To maintain that and their phase (0 or pi), they're heavily
-         * attenuated instead of shifted like the others.
+        /* To set up the filter, we first need to generate the desired
+         * response (not reversed).
          */
-        forward_fft(al::span{fftBuffer});
-        fftBuffer[0] *= std::numeric_limits<double>::epsilon();
-        for(size_t i{1};i < half_size;++i)
-            fftBuffer[i] = complex_d{-fftBuffer[i].imag(), fftBuffer[i].real()};
-        fftBuffer[half_size] *= std::numeric_limits<double>::epsilon();
-        for(size_t i{half_size+1};i < fft_size;++i)
-            fftBuffer[i] = std::conj(fftBuffer[fft_size - i]);
-        inverse_fft(al::span{fftBuffer});
+        auto tmpBuffer = std::vector<double>(fft_size, 0.0);
+        for(std::size_t i{0};i < fft_size/2;++i)
+        {
+            const int k{int{fft_size/2} - static_cast<int>(i*2 + 1)};
+
+            const double w{2.0*al::numbers::pi * static_cast<double>(i*2 + 1)
+                / double{fft_size}};
+            const double window{0.3635819 - 0.4891775*std::cos(w) + 0.1365995*std::cos(2.0*w)
+                - 0.0106411*std::cos(3.0*w)};
+
+            const double pk{al::numbers::pi * static_cast<double>(k)};
+            tmpBuffer[i*2 + 1] = window * (1.0-std::cos(pk)) / pk;
+        }
 
         /* The segments of the filter are converted back to the frequency
          * domain, each on their own (0 stuffed).
          */
-        auto fftBuffer2 = std::vector<complex_d>(sFftLength);
+        using complex_d = std::complex<double>;
+        auto fftBuffer = std::vector<complex_d>(sFftLength);
         auto fftTmp = al::vector<float,16>(sFftLength);
         auto filter = mFilterData.begin();
         for(size_t s{0};s < sNumSegments;++s)
         {
-            for(size_t i{0};i < sSampleLength;++i)
-                fftBuffer2[i] = fftBuffer[sSampleLength*s + i].real() / double{fft_size};
-            std::fill_n(fftBuffer2.begin()+sSampleLength, sSampleLength, complex_d{});
-            forward_fft(al::span{fftBuffer2});
+            const auto tmpspan = al::span{tmpBuffer}.subspan(sSampleLength*s, sSampleLength);
+            auto iter = std::copy_n(tmpspan.cbegin(), tmpspan.size(), fftBuffer.begin());
+            std::fill(iter, fftBuffer.end(), complex_d{});
+            forward_fft(fftBuffer);
 
             /* Convert to zdomain data for PFFFT, scaled by the FFT length so
              * the iFFT result will be normalized.
              */
             for(size_t i{0};i < sSampleLength;++i)
             {
-                fftTmp[i*2 + 0] = static_cast<float>(fftBuffer2[i].real()) / float{sFftLength};
-                fftTmp[i*2 + 1] = static_cast<float>((i == 0) ? fftBuffer2[sSampleLength].real()
-                    : fftBuffer2[i].imag()) / float{sFftLength};
+                fftTmp[i*2 + 0] = static_cast<float>(fftBuffer[i].real()) / float{sFftLength};
+                fftTmp[i*2 + 1] = static_cast<float>((i == 0) ? fftBuffer[sSampleLength].real()
+                    : fftBuffer[i].imag()) / float{sFftLength};
             }
             mFft.zreorder(fftTmp.data(), al::to_address(filter), PFFFT_BACKWARD);
             filter += sFftLength;