/
esola.cpp
119 lines (105 loc) · 4.82 KB
/
esola.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
//
// Created by Arjun Variar on 13/07/20.
//
#include <kfr/io/audiofile.hpp>
#include <kfr/io.hpp>
#include <kfr/dsp.hpp>
using namespace kfr;
std::vector<int> extract_epoch_indices(std::shared_ptr<univector<f32>> audio, double sample_frequency) {
const int window_length = int(0.015 * sample_frequency);
const int audio_size = audio->size();
std::vector<double> y2(audio_size);
std::vector<double> y3(audio_size);
std::vector<double> y(audio_size);
std::vector<int> epochs;
double mean_val;
double running_sum;
const double _x_0 = audio->at(0);
const double _x_1 = audio->at(1) - _x_0;
double _y1_0 = _x_0;
double _y1_1 = _x_1 + (2.0 * _y1_0);
double x_i = 0;
double y1_i = 0;
y2[0] = _y1_0;
y2[1] = _y1_1 + (2.0 * y2[0]);
for (int i = 2; i < audio_size; i++) {
x_i = audio->at(i) - audio->at(i - 1);
y1_i = x_i + (2.0 * _y1_1) - _y1_0;
y2[i] = y1_i + (2.0 * y2[i - 1]) - y2[i - 2];
_y1_0 = _y1_1;
_y1_1 = y1_i;
}
// Third stage
running_sum = std::accumulate(y2.begin(), y2.begin() + 2 * window_length + 2 , 0.0);
mean_val = 0;
for (int i = 0; i < audio_size; ++i) {
if ((i - window_length < 0) || (i + window_length >= audio_size)) {
mean_val = y2[i];
} else if (i - window_length == 0) {
mean_val = running_sum / (2 * window_length + 1);
} else {
running_sum -= y2[i - window_length - 1] - y2[i + window_length];
mean_val = running_sum / (2 * window_length + 1);
}
y3[i] = y2[i] - mean_val;
}
// Fourth Stage
running_sum = std::accumulate(y3.begin(), y3.begin() + 2 * window_length + 2 , 0.0);
mean_val = 0;
for (int i = 0; i < audio_size; ++i) {
if ((i - window_length < 0) || (i + window_length >= audio_size)) {
mean_val = y3[i];
} else if (i - window_length == 0) {
mean_val = running_sum / (2 * window_length + 1);
} else {
running_sum -= y3[i - window_length - 1] - y3[i + window_length];
mean_val = running_sum / (2 * window_length + 1);
}
y[i] = y3[i] - mean_val;
}
// Last stage
double last = y[0];
double act;
epochs.push_back(0);
for (int i = 0; i < audio_size; ++i) {
act = y[i];
if (last < 0 and act > 0) {
epochs.push_back(i);
}
last = act;
}
epochs.push_back(audio_size - 1);
return std::move(epochs);
}
void time_stretch(std::shared_ptr<univector<f32>> audio, std::shared_ptr<univector<f32>> synthesized_wav, std::vector<int> epoch_indices, float time_change_factor, int number_of_epochs_in_frame) {
int target_length = 0;
int last_epoch_index = epoch_indices[0];
const int epoch_size = epoch_indices.size();
univector<f32> window_wav;
for (int i = 0; i < epoch_size - number_of_epochs_in_frame; ++i) {
const int hop = epoch_indices[i + 1] - epoch_indices[i];
if (target_length >= synthesized_wav->size()) {
const int frame_length = epoch_indices[i + number_of_epochs_in_frame] - epoch_indices[i] - 1;
univector<f32> window = window_blackman<f32>(frame_length);
univector<f32> wav_frame_i = audio->slice(epoch_indices[i], frame_length) * window;
const int buffer_increase = int(wav_frame_i.size() - synthesized_wav->size() + last_epoch_index);
if (buffer_increase > 0) {
synthesized_wav->insert(synthesized_wav->end(), wav_frame_i.end() - buffer_increase,
wav_frame_i.end());
window_wav.insert(window_wav.end(), window.end() - buffer_increase, window.end());
}
synthesized_wav->slice(last_epoch_index, frame_length - buffer_increase) += wav_frame_i.slice(0,
wav_frame_i.size() -
buffer_increase);
window_wav.slice(last_epoch_index, frame_length - buffer_increase) += window.slice(0, window.size() -
buffer_increase);
last_epoch_index += hop;
}
target_length += int(hop * time_change_factor);
}
*synthesized_wav /= max(window_wav, univector<float>(window_wav.size(), 1e-4));
}
void esola(std::shared_ptr<univector<f32>> input_audio, std::shared_ptr<univector<f32>> output_audio, float time_change_factor, int number_of_epochs_in_frame, double sample_frequency) {
auto epoch_indices = extract_epoch_indices(input_audio, sample_frequency);
time_stretch(input_audio, output_audio, epoch_indices, time_change_factor, number_of_epochs_in_frame);
}