Skip to content

Commit

Permalink
Add new benchmark for more complete results
Browse files Browse the repository at this point in the history
Still far from addressing #128, but still a nice improvement to the
status quo.

[ci skip]
  • Loading branch information
Morwenn committed Sep 19, 2020
1 parent 455910b commit 2a1ca17
Show file tree
Hide file tree
Showing 2 changed files with 239 additions and 0 deletions.
163 changes: 163 additions & 0 deletions benchmarks/errorbar-plot/bench.cpp
@@ -0,0 +1,163 @@
/*
* Copyright (c) 2020 Morwenn
* SPDX-License-Identifier: MIT
*/
#include <algorithm>
#include <cassert>
#include <chrono>
#include <cmath>
#include <cstddef>
#include <cstdint>
#include <ctime>
#include <fstream>
#include <iostream>
#include <iterator>
#include <ratio>
#include <sstream>
#include <string>
#include <utility>
#include <vector>
#include <cpp-sort/sorters.h>
#include "distributions.h"

using namespace std::chrono_literals;

////////////////////////////////////////////////////////////
// Benchmark configuration variables

// Type of data to sort during the benchmark
using value_t = int;
// Type of collection to sort
using collection_t = std::vector<value_t>;

// Sorting algorithms to benchmark
using sort_f = void (*)(collection_t&);
std::pair<std::string, sort_f> sorts[] = {
{ "heap_sort", cppsort::heap_sort },
{ "poplar_sort", cppsort::poplar_sort },
{ "smooth_sort", cppsort::smooth_sort }
};

// Distribution to benchmark against
auto distribution = shuffled{};

// Sizes of the collections to sort
std::uint64_t size_min = 1u << 1;
std::uint64_t size_max = 1u << 19;

// Maximum time to let the benchmark run for a given size before giving up
auto max_run_time = 60s;
// Maximum number of benchmark runs per size
std::size_t max_runs_per_size = 25;


////////////////////////////////////////////////////////////
// Utility functions

auto safe_file_name(std::string filename)
-> std::string
{
char invalid_characters[] = {'/', '\\', ':', '*', '?', '"', '<', '>', '|'};

// Replace characters to make a filename usable
for (char& character : filename) {
auto found_it = std::find(std::begin(invalid_characters),
std::end(invalid_characters),
character);
if (found_it != std::end(invalid_characters)) {
character = '_';
}
}

return filename;
}

auto average(const std::vector<double>& diffs)
-> double
{
double avg = 0.0;
for (auto value : diffs) {
avg += value / double(diffs.size());
}
return avg;
}

auto standard_deviation(const std::vector<double>& diffs, double avg)
-> double
{
double stddev = 0.0;
for (auto value : diffs) {
stddev += (value - avg) * (value - avg) / double(diffs.size());
}
return std::sqrt(stddev);
}


////////////////////////////////////////////////////////////
// Benchmark code proper

int main(int argc, char** argv)
{
// Choose the output directory
std::string output_directory = ".";
if (argc > 1) {
output_directory = argv[1];
}

// Always use a steady clock
using clock_type = std::conditional_t<
std::chrono::high_resolution_clock::is_steady,
std::chrono::high_resolution_clock,
std::chrono::steady_clock
>;

// Poor seed, yet enough for our benchmarks
std::uint_fast32_t seed = std::time(nullptr);
std::cout << "SEED: " << seed << '\n';

for (auto& sort: sorts) {
// Create a file to store the results
std::string output_filename = output_directory + '/' + safe_file_name(sort.first) + ".csv";
std::ofstream output_file(output_filename);
output_file << sort.first << '\n';
std::cout << sort.first << '\n';

// Seed the distribution manually to ensure that all algorithms
// sort the same collections when there is randomness
distributions_prng.seed(seed);

// Sort the collection as long as needed
std::uint64_t pow_of_2 = 0; // For logs
for (auto size = size_min ; size <= size_max ; size <<= 1) {
std::vector<double> times;

auto total_start = clock_type::now();
auto total_end = clock_type::now();
while (std::chrono::duration_cast<std::chrono::seconds>(total_end - total_start) < max_run_time &&
times.size() < max_runs_per_size) {
collection_t collection;
distribution(std::back_inserter(collection), size);
auto start = clock_type::now();
sort.second(collection);
auto end = clock_type::now();
assert(std::is_sorted(std::begin(collection), std::end(collection)));
times.push_back(std::chrono::duration<double, std::milli>(end - start).count());
total_end = clock_type::now();
}

// Compute and display stats & numbers
double avg = average(times);

std::ostringstream ss;
ss << pow_of_2++ << ", "
<< size << ", "
<< avg << ", "
<< standard_deviation(times, avg) << '\n';
output_file << ss.str();
std::cout << ss.str();

// Abort if the allocated time was merely enough to benchmark a single run
if (times.size() < 2) break;
}
}
}
76 changes: 76 additions & 0 deletions benchmarks/errorbar-plot/plot.py
@@ -0,0 +1,76 @@
# -*- coding: utf-8 -*-

# Copyright (c) 2020 Morwenn
# SPDX-License-Identifier: MIT

import argparse
import pathlib
import sys

import numpy
from matplotlib import pyplot


def main():
parser = argparse.ArgumentParser(description="Plot the benchmark results.")
parser.add_argument('root', help="directory with the resut files to plot")
parser.add_argument('--alternative-palette', dest='use_alt_palette',
action='store_true', default=False,
help="Use another color palette")
args = parser.parse_args()

root = pathlib.Path(args.root)
result_files = list(root.glob('*.csv'))
if len(result_files) == 0:
print(f"There are no files to plot in {root}")
sys.exit(1)

# Choose the colour palette and markers to use
if args.use_alt_palette:
# That one has the advantage of being infinite
palette = pyplot.cm.rainbow(numpy.linspace(0, 1, len(result_files)))
else:
# Colorblind-friendly palette (https://gist.github.com/thriveth/8560036)
palette = ['#377eb8', '#ff7f00', '#4daf4a',
'#f781bf', '#a65628', '#984ea3',
'#999999', '#e41a1c', '#dede00']
colors = iter(palette)
markers = iter(['o', '^', 's', 'p', 'P', '*', 'H', 'X', 'D', 'v'])

ax = pyplot.gca()

for result_file in result_files:
with result_file.open() as fd:
# Read the first line
algo_name = fd.readline().strip()
# Read the rest of the file
data = numpy.genfromtxt(fd, delimiter=',').transpose()
pow_of_2, size, avg, stddev = data
# Add result to graph
ax.errorbar(
size,
avg / 1000.0,
yerr=stddev / 1000.0,
label=algo_name,
color=next(colors),
marker=next(markers)
)

ax.grid(True)
ax.set_xlabel('Size')
ax.set_ylabel('Time [s]')
ax.set_xscale('log', basex=2)
ax.set_yscale('log')
pyplot.xlim(pyplot.xlim()[0] / 2, pyplot.xlim()[1] * 2)

pyplot.title("Sorting std::vector<int>")
pyplot.legend(loc='best')

figure = pyplot.gcf()
figure.set_size_inches(10, 6)

pyplot.show()


if __name__ == '__main__':
main()

0 comments on commit 2a1ca17

Please sign in to comment.