Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add new benchmark for more complete results
Still far from addressing #128, but still a nice improvement to the status quo. [ci skip]
- Loading branch information
Showing
2 changed files
with
239 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,163 @@ | ||
/* | ||
* Copyright (c) 2020 Morwenn | ||
* SPDX-License-Identifier: MIT | ||
*/ | ||
#include <algorithm> | ||
#include <cassert> | ||
#include <chrono> | ||
#include <cmath> | ||
#include <cstddef> | ||
#include <cstdint> | ||
#include <ctime> | ||
#include <fstream> | ||
#include <iostream> | ||
#include <iterator> | ||
#include <ratio> | ||
#include <sstream> | ||
#include <string> | ||
#include <utility> | ||
#include <vector> | ||
#include <cpp-sort/sorters.h> | ||
#include "distributions.h" | ||
|
||
using namespace std::chrono_literals; | ||
|
||
//////////////////////////////////////////////////////////// | ||
// Benchmark configuration variables | ||
|
||
// Type of data to sort during the benchmark | ||
using value_t = int; | ||
// Type of collection to sort | ||
using collection_t = std::vector<value_t>; | ||
|
||
// Sorting algorithms to benchmark | ||
using sort_f = void (*)(collection_t&); | ||
std::pair<std::string, sort_f> sorts[] = { | ||
{ "heap_sort", cppsort::heap_sort }, | ||
{ "poplar_sort", cppsort::poplar_sort }, | ||
{ "smooth_sort", cppsort::smooth_sort } | ||
}; | ||
|
||
// Distribution to benchmark against | ||
auto distribution = shuffled{}; | ||
|
||
// Sizes of the collections to sort | ||
std::uint64_t size_min = 1u << 1; | ||
std::uint64_t size_max = 1u << 19; | ||
|
||
// Maximum time to let the benchmark run for a given size before giving up | ||
auto max_run_time = 60s; | ||
// Maximum number of benchmark runs per size | ||
std::size_t max_runs_per_size = 25; | ||
|
||
|
||
//////////////////////////////////////////////////////////// | ||
// Utility functions | ||
|
||
auto safe_file_name(std::string filename) | ||
-> std::string | ||
{ | ||
char invalid_characters[] = {'/', '\\', ':', '*', '?', '"', '<', '>', '|'}; | ||
|
||
// Replace characters to make a filename usable | ||
for (char& character : filename) { | ||
auto found_it = std::find(std::begin(invalid_characters), | ||
std::end(invalid_characters), | ||
character); | ||
if (found_it != std::end(invalid_characters)) { | ||
character = '_'; | ||
} | ||
} | ||
|
||
return filename; | ||
} | ||
|
||
auto average(const std::vector<double>& diffs) | ||
-> double | ||
{ | ||
double avg = 0.0; | ||
for (auto value : diffs) { | ||
avg += value / double(diffs.size()); | ||
} | ||
return avg; | ||
} | ||
|
||
auto standard_deviation(const std::vector<double>& diffs, double avg) | ||
-> double | ||
{ | ||
double stddev = 0.0; | ||
for (auto value : diffs) { | ||
stddev += (value - avg) * (value - avg) / double(diffs.size()); | ||
} | ||
return std::sqrt(stddev); | ||
} | ||
|
||
|
||
//////////////////////////////////////////////////////////// | ||
// Benchmark code proper | ||
|
||
int main(int argc, char** argv) | ||
{ | ||
// Choose the output directory | ||
std::string output_directory = "."; | ||
if (argc > 1) { | ||
output_directory = argv[1]; | ||
} | ||
|
||
// Always use a steady clock | ||
using clock_type = std::conditional_t< | ||
std::chrono::high_resolution_clock::is_steady, | ||
std::chrono::high_resolution_clock, | ||
std::chrono::steady_clock | ||
>; | ||
|
||
// Poor seed, yet enough for our benchmarks | ||
std::uint_fast32_t seed = std::time(nullptr); | ||
std::cout << "SEED: " << seed << '\n'; | ||
|
||
for (auto& sort: sorts) { | ||
// Create a file to store the results | ||
std::string output_filename = output_directory + '/' + safe_file_name(sort.first) + ".csv"; | ||
std::ofstream output_file(output_filename); | ||
output_file << sort.first << '\n'; | ||
std::cout << sort.first << '\n'; | ||
|
||
// Seed the distribution manually to ensure that all algorithms | ||
// sort the same collections when there is randomness | ||
distributions_prng.seed(seed); | ||
|
||
// Sort the collection as long as needed | ||
std::uint64_t pow_of_2 = 0; // For logs | ||
for (auto size = size_min ; size <= size_max ; size <<= 1) { | ||
std::vector<double> times; | ||
|
||
auto total_start = clock_type::now(); | ||
auto total_end = clock_type::now(); | ||
while (std::chrono::duration_cast<std::chrono::seconds>(total_end - total_start) < max_run_time && | ||
times.size() < max_runs_per_size) { | ||
collection_t collection; | ||
distribution(std::back_inserter(collection), size); | ||
auto start = clock_type::now(); | ||
sort.second(collection); | ||
auto end = clock_type::now(); | ||
assert(std::is_sorted(std::begin(collection), std::end(collection))); | ||
times.push_back(std::chrono::duration<double, std::milli>(end - start).count()); | ||
total_end = clock_type::now(); | ||
} | ||
|
||
// Compute and display stats & numbers | ||
double avg = average(times); | ||
|
||
std::ostringstream ss; | ||
ss << pow_of_2++ << ", " | ||
<< size << ", " | ||
<< avg << ", " | ||
<< standard_deviation(times, avg) << '\n'; | ||
output_file << ss.str(); | ||
std::cout << ss.str(); | ||
|
||
// Abort if the allocated time was merely enough to benchmark a single run | ||
if (times.size() < 2) break; | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
# -*- coding: utf-8 -*- | ||
|
||
# Copyright (c) 2020 Morwenn | ||
# SPDX-License-Identifier: MIT | ||
|
||
import argparse | ||
import pathlib | ||
import sys | ||
|
||
import numpy | ||
from matplotlib import pyplot | ||
|
||
|
||
def main(): | ||
parser = argparse.ArgumentParser(description="Plot the benchmark results.") | ||
parser.add_argument('root', help="directory with the resut files to plot") | ||
parser.add_argument('--alternative-palette', dest='use_alt_palette', | ||
action='store_true', default=False, | ||
help="Use another color palette") | ||
args = parser.parse_args() | ||
|
||
root = pathlib.Path(args.root) | ||
result_files = list(root.glob('*.csv')) | ||
if len(result_files) == 0: | ||
print(f"There are no files to plot in {root}") | ||
sys.exit(1) | ||
|
||
# Choose the colour palette and markers to use | ||
if args.use_alt_palette: | ||
# That one has the advantage of being infinite | ||
palette = pyplot.cm.rainbow(numpy.linspace(0, 1, len(result_files))) | ||
else: | ||
# Colorblind-friendly palette (https://gist.github.com/thriveth/8560036) | ||
palette = ['#377eb8', '#ff7f00', '#4daf4a', | ||
'#f781bf', '#a65628', '#984ea3', | ||
'#999999', '#e41a1c', '#dede00'] | ||
colors = iter(palette) | ||
markers = iter(['o', '^', 's', 'p', 'P', '*', 'H', 'X', 'D', 'v']) | ||
|
||
ax = pyplot.gca() | ||
|
||
for result_file in result_files: | ||
with result_file.open() as fd: | ||
# Read the first line | ||
algo_name = fd.readline().strip() | ||
# Read the rest of the file | ||
data = numpy.genfromtxt(fd, delimiter=',').transpose() | ||
pow_of_2, size, avg, stddev = data | ||
# Add result to graph | ||
ax.errorbar( | ||
size, | ||
avg / 1000.0, | ||
yerr=stddev / 1000.0, | ||
label=algo_name, | ||
color=next(colors), | ||
marker=next(markers) | ||
) | ||
|
||
ax.grid(True) | ||
ax.set_xlabel('Size') | ||
ax.set_ylabel('Time [s]') | ||
ax.set_xscale('log', basex=2) | ||
ax.set_yscale('log') | ||
pyplot.xlim(pyplot.xlim()[0] / 2, pyplot.xlim()[1] * 2) | ||
|
||
pyplot.title("Sorting std::vector<int>") | ||
pyplot.legend(loc='best') | ||
|
||
figure = pyplot.gcf() | ||
figure.set_size_inches(10, 6) | ||
|
||
pyplot.show() | ||
|
||
|
||
if __name__ == '__main__': | ||
main() |