-
Notifications
You must be signed in to change notification settings - Fork 0
/
algorithm.cpp
436 lines (390 loc) · 15.2 KB
/
algorithm.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
#include <iostream>
#include <cstdlib>
#include <algorithm>
#include <iostream>
#include <random>
#include <vector>
#include <cmath>
#include <ctime>
#include <vector>
#include <unordered_map>
#include <algorithm>
#include <cassert>
#include <chrono>
#include <boost/lexical_cast.hpp>
#include "util.hpp"
//-------------------------------------------------------------
#define SORTING 0
#define RADIX 1
#define CDF 2
#define TEST 3
//-------------------------------------------------------------
using namespace std::chrono;
using hash_coord = std::pair<uint64_t, bool>;
using cdf_coord = util::cdf_coord;
using sumCoord = util::sumCoord;
using Coord = util::Coord;
//-------------------------------------------------------------
static uint32_t OPTION;
static uint64_t MAX_VALUE;
static uint64_t DEVIATION;
static uint32_t MAX_N;
static uint64_t MAX_VALUE_X;
static uint64_t MAX_VALUE_Y;
//-------------------------------------------------------------
auto randomNumberBetween = [](uint64_t low, uint64_t high) {
auto randomFunc = [distribution_ = std::uniform_int_distribution<uint64_t>(low, high), random_engine_ = std::mt19937{std::random_device{}()}]() mutable {
return distribution_(random_engine_);
};
return randomFunc;
};
//-------------------------------------------------------------
static std::vector<uint64_t> generator(uint64_t maxValue)
// generates a sorted array of MAX_N unsigned ints without duplicates
{
std::vector<uint64_t> numbers;
std::generate_n(std::back_inserter(numbers), 10000, randomNumberBetween(1, maxValue));
std::sort(std::begin(numbers), std::end(numbers));
return numbers;
}
//-------------------------------------------------------------
void applyFrequencies(std::unordered_map<uint64_t, hash_coord>& freqHashTable, std::vector<cdf_coord>& cdf)
// Update cdf with the frequencies from the hash table
{
uint64_t partialSum = 0;
for (unsigned index = 0, limit = cdf.size(); index < limit; ++index) {
uint64_t currSum = freqHashTable[cdf[index].first].first;
partialSum += currSum;
cdf[index].second = partialSum;
}
}
//-------------------------------------------------------------
std::unordered_map<uint64_t, hash_coord> computeFreqHashTable(std::vector<uint64_t>& xs, std::vector<uint64_t>& ys)
// Return the hash table with the frequencies of the resulted sums
// This is used by all algorithms
{
// Hash with the frequencies of each sum
assert(xs.size() == ys.size());
std::unordered_map<uint64_t, hash_coord> count_;
for (unsigned index = 0; index < xs.size(); ++index) {
for (unsigned ptr = 0; ptr < ys.size(); ++ptr) {
// TODO: assuming that the value is still a 32-bit unsigned value
// This serves only to the theoretical part of the algorithm
uint64_t sum = xs[index] + ys[ptr];
++count_[sum].first;
}
}
return count_;
}
//-------------------------------------------------------------
std::vector<cdf_coord> preprocessing_sorting(std::unordered_map<uint64_t, hash_coord>& count_, std::vector<uint64_t>& xs, std::vector<uint64_t>& ys)
// Solve the task with sorting
{
std::vector<cdf_coord> final_(count_.size());
unsigned index = 0;
for (auto elem: count_)
final_[index++] = std::make_pair(elem.first, 0);
return final_;
}
//-------------------------------------------------------------
void solveWithSorting(std::vector<cdf_coord>& cdf)
{
auto start = high_resolution_clock::now();
std::sort(cdf.begin(), cdf.end(), [](auto left, auto right) {
return left.first < right.first;
});
auto stop = high_resolution_clock::now();
std::cout << "Solving with Sorting took: " << duration_cast<milliseconds>(stop - start).count() << " ms" << std::endl;
}
//-------------------------------------------------------------
typedef struct {
uint64_t sum, next;
} cell;
//-------------------------------------------------------------
std::vector<uint64_t> adj;
std::vector<cell> list;
//-------------------------------------------------------------
void addToChain(uint64_t ptr, uint64_t sum, uint64_t index)
// add in the hash-table the sum
{
list[ptr].sum = sum;
list[ptr].next = adj[index];
adj[index] = ptr;
}
//-------------------------------------------------------------
std::vector<uint64_t> collectChain(uint64_t index)
// Collect the chain into a vector
{
std::vector<uint64_t> ret;
for (uint32_t ptr = adj[index]; ptr; ptr = list[ptr].next)
ret.push_back(list[ptr].sum);
return ret;
}
//-------------------------------------------------------------
std::vector<cdf_coord> preprocessing_cdf(std::unordered_map<uint64_t, hash_coord>& count_, std::vector<uint64_t>& xs, std::vector<uint64_t>& ys)
// Solve with the cdf approximation
{
// Construct an artificial spline
assert(xs.size() == ys.size());
std::vector<cdf_coord> spline(xs.size());
for (unsigned index = 0; index < xs.size(); ++index)
spline[index].first = xs[index] + ys[index];
auto forward_pointer = [&spline](uint32_t& knotPtr, uint64_t sum) {
// Iterate through the spline knots and find the segment of sum
while ((knotPtr < spline.size()) && (spline[knotPtr].first <= sum))
++knotPtr;
// Special case when reaching the last element
knotPtr -= (knotPtr == spline.size());
};
// Hash with the frequencies of each sum
unsigned sums_counter = count_.size();
for (unsigned index = 0; index < xs.size(); ++index) {
// Reset the pointer to the splien knot at every new line
unsigned knotPtr = 0;
for (unsigned ptr = 0; ptr < ys.size(); ++ptr) {
// TODO: assuming that the value is still a 32-bit unsigned value
// This serves only to the theoretical part of the algorithm
uint64_t sum = xs[index] + ys[ptr];
// Forward the pointer in the spline knots
forward_pointer(knotPtr, sum);
// First occurence of sum? Then update the "Mars Smen"
if (count_[sum].second == false) {
++spline[knotPtr].second;
count_[sum].second = true;
}
}
}
// Update the "Mars Smen"
for (unsigned index = 1; index < spline.size(); ++index)
spline[index].second += spline[index - 1].second;
#if 0
std::cerr << "Final Spline : " << std::endl;
for (auto elem: spline)
std::cerr << elem.first << " " << elem.second << std::endl;
#endif
auto interpolate = [&spline](uint32_t knotPtr, uint64_t sum) {
// Compute the slope
double dx = spline[knotPtr].first - spline[knotPtr - 1].first;
double dy = spline[knotPtr].second - spline[knotPtr - 1].second;
// Compute offset for the linear function
double ofs = sum - spline[knotPtr - 1].first;
return spline[knotPtr - 1].second + ofs * (dy / dx);
};
#if 0
std::cerr << sums_counter << " out of " << xs.size() * ys.size() << std::endl;
#endif
// Compute the CDF approximation
// Create the hash table
uint32_t sums_size = sums_counter + 1;
adj.resize(sums_size);
list.resize(sums_size + 1);
sums_counter = 0;
for (unsigned index = 0; index < xs.size(); ++index) {
unsigned knotPtr = 0;
for (unsigned ptr = 0; ptr < ys.size(); ++ptr) {
// TODO: assuming that the value is stil a 32-bit unsigned value
// This serves only to the theoretical part of the algorithm
uint64_t sum = xs[index] + ys[ptr];
// Forward the pointer in the spline knots
forward_pointer(knotPtr, sum);
// First occurence of sum? (we already used it before, so it changes the meaning)
if (count_[sum].second == true) {
double estimation = interpolate(knotPtr, sum);
uint32_t estimatedIndex = static_cast<uint32_t>(round(estimation));
addToChain(++sums_counter, sum, estimatedIndex);
// Mark the sum as seen
count_[sum].second = false;
}
}
}
std::cerr << "Chain done" << std::endl;
// Functions to add the values into the final order
unsigned currBuffer = 0;
std::vector<cdf_coord> final_(sums_counter);
auto addKeyToFinal = [&final_, &currBuffer](uint64_t key) {
final_[currBuffer++] = std::make_pair(key, 0);
};
auto addChain = [addKeyToFinal](std::vector<uint64_t>& values) {
for (auto elem: values)
addKeyToFinal(elem);
};
// Go through the chains from the hash-table
std::vector<uint64_t> collector_;
for (unsigned index = 0; index < sums_size; ++index) {
// TODO: try also with sorting the chain
collector_ = collectChain(index);
// And insert each element of the chain
addChain(collector_);
}
std::cerr << "Sums = " << sums_size << std::endl;
#if 0
std::cerr << currBuffer << " vs " << sums_size << std::endl;
assert(currBuffer == sums_size);
#endif
std::cerr << "Final done" << std::endl;
#if 0
util::printComputedCdf("final", final_);
#endif
return final_;
}
//-------------------------------------------------------------
void solveWithCdf(std::vector<cdf_coord>& cdf)
// Insertion sort for partially sorted cdf
{
auto start = high_resolution_clock::now();
int size = cdf.size();
for (unsigned index = 1, limit = cdf.size(); index < limit; ++index) {
unsigned ptr = index;
cdf_coord curr = cdf[index];
while ((ptr > 0) && (cdf[ptr - 1].first > curr.first)) {
cdf[ptr] = cdf[ptr - 1];
--ptr;
}
cdf[ptr] = curr;
}
auto stop = high_resolution_clock::now();
std::cout << "Solving with CDF approximation took: " << duration_cast<milliseconds>(stop - start).count() << " ms" << std::endl;
}
//-------------------------------------------------------------
std::vector<cdf_coord> chooseAlgorithm(uint32_t option)
// Benchmark the different algorithms, by returning the CDF of the sorting.
{
// Final result - the CDF of the sorting
std::vector<cdf_coord> ret;
// Create the input
std::vector<uint64_t> xs = generator(MAX_VALUE_X);
std::vector<uint64_t> ys = generator(MAX_VALUE_Y);
auto printVector = [&](std::string msg, const std::vector<uint64_t>& arr) {
std::cout << msg << std::endl;
for (auto v : arr)
std::cout << v << " ";
std::cout << std::endl;
};
#if 0
printVector("X", xs);
printVector("Y", ys);
#endif
std::unordered_map<uint64_t, hash_coord> count_ = computeFreqHashTable(xs, ys);
// Analyze options
switch (option) {
// Sort with a comparison-based sorting algorithm (N^2 log N)
// Sort with radix sort (N ^ 2)
case RADIX : {
assert(("Radix not yet supported", 0));
#if 0
cdf = solveWithRadix(xs, ys);
#endif
break;
}
case TEST : {
std::cerr << "Testing" << std::endl;
}
case SORTING : {
#if 1
auto start = high_resolution_clock::now();
ret = preprocessing_sorting(count_, xs, ys);
solveWithSorting(ret);
applyFrequencies(count_, ret);
auto stop = high_resolution_clock::now();
std::cout << "Sorting (preprocessing + solve) took: " << duration_cast<milliseconds>(stop - start).count() << " ms" << std::endl;
#endif
if (option != TEST)
break;
}
// Sort with the CDF approximation (N ^ 2)
case CDF : {
auto start = high_resolution_clock::now();
ret = preprocessing_cdf(count_, xs, ys);
solveWithCdf(ret);
applyFrequencies(count_, ret);
auto stop = high_resolution_clock::now();
std::cout << "CDF approximation (preprocessing + solve) took: " << duration_cast<milliseconds>(stop - start).count() << " ms" << std::endl;
break;
}
default : {
std::cerr << "Option " << option << " not (yet) available" << std::endl;
}
}
#if 0
util::printComputedCdf("Choose algorithm", cdf);
#endif
std::cerr << "Is cdf? " << util::is_cdf(xs, ys, ret) << std::endl;
return ret;
}
//-------------------------------------------------------------
int main(int argc, char** argv) {
srand(time(NULL));
if (argc != 5) {
std::cerr << "Usage: " << argv[0] << " [OPTION] [MAX_N] [MAX_VALUE_X] [MAX_VALUE_Y]" << std::endl;
return -1;
}
OPTION = atoi(argv[1]);
MAX_N = atoi(argv[2]);
double d = boost::lexical_cast<double>(argv[3]);
MAX_VALUE_X = static_cast<uint64_t>(d);
d = boost::lexical_cast<double>(argv[4]);
MAX_VALUE_Y = static_cast<uint64_t>(d);
std::cerr << "check " << MAX_VALUE_X << " and " << MAX_VALUE_Y << std::endl;
std::vector<cdf_coord> ret = chooseAlgorithm(OPTION);
#if 0
printVector("X", xs);
printVector("Y", ys);
#endif
#if 0
std::unordered_map<uint32_t, bool> seen;
sums.reserve(xs.size() * ys.size());
for (unsigned index = 0; index < xs.size(); ++index) {
for (unsigned ptr = 0; ptr < ys.size(); ++ptr) {
uint32_t sum = xs[index] + ys[ptr];
if (seen[sum])
continue;
seen[sum] = true;
sums.push_back(std::make_pair(xs[index] + ys[ptr], std::make_pair(index, ptr)));
}
}
#if 1
std::sort(sums.begin(), sums.end(), [](auto left, auto right) {
return left.first < right.first;
});
#endif
#endif
#if 0
cdf = util::buildCdf<sumCoord>(sums);
spline = buildArtificial("artificial", xs, ys);
util::printErrors(cdf, buildArtificial("artificial", xs, ys));
std::cerr << "First point of spline: (" << spline.front().first << "," << spline.front().second << ") and last point of spline: (" << spline.back().first << "," << spline.back().second << ")" << std::endl;
#endif
#if 0
for (auto elem: spline) {
std::cerr << elem.first << " " << elem.second << std::endl;
}
#endif
#if 0
adj.resize(sums.size(), 0);
list.resize(sums.size() + 1); // the first buffer is not used
for (auto elem: sums) {
uint32_t currSum = elem.first;
double estimation = interpolate(currSum);
uint32_t index = static_cast<uint32_t>(round(estimation));
index = index % sums.size();
addSum(currSum, index);
}
std::cerr << "alles gut hier" << std::endl;
unsigned currPos = 0;
double avgLen = 0, maxLen = 0;
std::cerr << "before resize with " << sums.size() << std::endl;
final_.resize(sums.size());
std::cerr << "after resize" << std::endl;
for (unsigned index = 0; index < sums.size(); ++index) {
// std::cerr << "Progress: " << index << std::endl;
std::vector<uint32_t> currChain = computeChain(index);
if (!currChain.empty()) {
for (auto elem: currChain)
insert(currPos++, elem);
}
}
assert(currPos == sums.size());
std::cerr << "Is sorted? " << std::is_sorted(final_.begin(), final_.end()) << std::endl;
#endif
return 0;
}