Merge pull request #215 from Morwenn/develop

Release 1.14.0
Morwenn · Dec 17, 2022 · 29b593a · 29b593a
2 parents 23424cc + cbad910
commit 29b593a
Show file tree

Hide file tree

Showing 126 changed files with 2,283 additions and 1,015 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -5,7 +5,7 @@ cmake_minimum_required(VERSION 3.8.0)
 
 list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake)
 
-project(cpp-sort VERSION 1.13.2 LANGUAGES CXX)
+project(cpp-sort VERSION 1.14.0 LANGUAGES CXX)
 
 include(CMakePackageConfigHelpers)
 include(GNUInstallDirs)

diff --git a/NOTICE.txt b/NOTICE.txt
@@ -75,6 +75,16 @@ In addition, certain files include the notices provided below.
 
 ----------------------
 
+// boost heap: d-ary heap as container adaptor
+//
+// Copyright (C) 2010 Tim Blechmann
+//
+// Distributed under the Boost Software License, Version 1.0. (See
+// accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt)
+
+----------------------
+
 //----------------------------------------------------------------------------
 /// @file merge.hpp
 /// @brief low level merge functions

diff --git a/README.md b/README.md
@@ -1,7 +1,7 @@
 ![cpp-sort logo](docs/images/cpp-sort-logo.svg)
 
-[![Latest Release](https://img.shields.io/badge/release-1.13.2-blue.svg)](https://github.com/Morwenn/cpp-sort/releases/tag/1.13.2)
-[![Conan Package](https://img.shields.io/badge/conan-cpp--sort%2F1.13.2-blue.svg)](https://conan.io/center/cpp-sort?version=1.13.2)
+[![Latest Release](https://img.shields.io/badge/release-1.14.0-blue.svg)](https://github.com/Morwenn/cpp-sort/releases/tag/1.14.0)
+[![Conan Package](https://img.shields.io/badge/conan-cpp--sort%2F1.14.0-blue.svg)](https://conan.io/center/cpp-sort?version=1.14.0)
 [![Code Coverage](https://codecov.io/gh/Morwenn/cpp-sort/branch/develop/graph/badge.svg)](https://codecov.io/gh/Morwenn/cpp-sort)
 [![Pitchfork Layout](https://img.shields.io/badge/standard-PFL-orange.svg)](https://github.com/vector-of-bool/pitchfork)
 
@@ -98,16 +98,22 @@ and extending **cpp-sort** in [the wiki](https://github.com/Morwenn/cpp-sort/wik
 # Benchmarks
 
 The following graph has been generated with a script found in the benchmarks
-directory. It shows the time needed for a sorting algorithm to sort one million
-shuffled `std::array<int, N>` of sizes 0 to 32. It compares the sorters generally
-used to sort small arrays:
+directory. It shows the time needed for [`heap_sort`][heap-sorter] to sort one
+million elements without being adapted, then when it is adapted with either
+[`drop_merge_adapter`][drop-merge-adapter] or [`split_adapter`][split-adapter].
 
-![Benchmark speed of small sorts with increasing size for std::array<int>](https://i.imgur.com/dOa3vyl.png)
+![Graph showing the speed difference between heap_sort raw, then adapted with
+split_adapter and drop_merge_adapter, when the number of inversions in the
+std::vector<int> to sort increases](https://i.imgur.com/IcjUkYF.png)
 
-These results were generated with MinGW-w64 g++ 10.1 with the compiler options
-`-std=c++2a -O3 -march=native`. That benchmark is merely an example to make this
-introduction look good. You can find more commented benchmarks in the [dedicated
-wiki page](https://github.com/Morwenn/cpp-sort/wiki/Benchmarks).
+As can be seen above, wrapping `heap_sort` with either of the adapters makes it
+[*adaptive*][adaptive-sort] to the number of inversions in a non-intrusive
+manner. The algorithms used to adapt it have different pros and cons, it is up
+to you to use either.
+
+This benchmark is mostly there to show the possibilities offered by the
+library. You can find more such commented benchmarks in the [dedicated wiki
+page][benchmarks].
 
 # Compiler support & tooling
 
@@ -156,7 +162,14 @@ parts of the benchmarks come from there as well.
 of a Timsort](https://github.com/gfx/cpp-TimSort).
 
 * The three algorithms used by `spread_sorter` come from Steven Ross [Boost.Sort
-module](https://www.boost.org/doc/libs/1_71_0/libs/sort/doc/html/index.html).
+module](https://www.boost.org/doc/libs/1_80_0/libs/sort/doc/html/index.html).
+
+* The algorithm used by `d_ary_spread_sorter` comes from Tim Blechmann's
+[Boost.Heap module](https://www.boost.org/doc/libs/1_80_0/doc/html/heap.html).
+
+* The algorithm used by `spin_sorter` comes from the eponymous algorithm implemented
+in [Boost.Sort](https://www.boost.org/doc/libs/1_80_0/libs/sort/doc/html/index.html).
+by Francisco Jose Tapia.
 
 * [`utility::as_function`](https://github.com/Morwenn/cpp-sort/wiki/Miscellaneous-utilities#as_function),
 [`utility::static_const`](https://github.com/Morwenn/cpp-sort/wiki/Miscellaneous-utilities#static_const),
@@ -227,3 +240,10 @@ and [Crascit/DownloadProject](https://github.com/Crascit/DownloadProject).
 
 * Some of the benchmarks use a [colorblind-friendly palette](https://gist.github.com/thriveth/8560036)
 developed by Thøger Rivera-Thorsen.
+
+
+  [adaptive-sort]: https://en.wikipedia.org/wiki/Adaptive_sort
+  [benchmarks]: https://github.com/Morwenn/cpp-sort/wiki/Benchmarks
+  [drop-merge-adapter]: https://github.com/Morwenn/cpp-sort/wiki/Sorter-adapters#drop_merge_adapter
+  [heap-sorter]: https://github.com/Morwenn/cpp-sort/wiki/Sorters#heap_sorter
+  [split-adapter]: https://github.com/Morwenn/cpp-sort/wiki/Sorter-adapters#split_adapter
diff --git a/benchmarks/benchmarking-tools/distributions.h b/benchmarks/benchmarking-tools/distributions.h
@@ -359,9 +359,9 @@ namespace dist
 
             for (long long int i = 0 ; i < size ; ++i) {
                 if (percent_dis(distributions_prng) < factor) {
-                    *out++ = value_dis(distributions_prng);
+                    *out++ = proj(value_dis(distributions_prng));
                 } else {
-                    *out++ = i;
+                    *out++ = proj(i);
                 }
             }
         }

diff --git a/benchmarks/inversions/inv-bench.cpp b/benchmarks/inversions/inv-bench.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2021 Morwenn
+ * Copyright (c) 2020-2022 Morwenn
  * SPDX-License-Identifier: MIT
  */
 #include <cassert>
@@ -17,7 +17,6 @@
 #include "../benchmarking-tools/distributions.h"
 #include "../benchmarking-tools/filesystem.h"
 #include "../benchmarking-tools/rdtsc.h"
-#include "../benchmarking-tools/statistics.h"
 
 using namespace std::chrono_literals;
 
@@ -34,14 +33,14 @@ using sort_f = void (*)(collection_t&);
 std::pair<std::string, sort_f> sorts[] = {
     { "drop_merge_sort",    cppsort::drop_merge_sort    },
     { "pdq_sort",           cppsort::pdq_sort           },
-    { "split_sort",         cppsort::split_sort         }
+    { "split_sort",         cppsort::split_sort         },
 };
 
 // Size of the collections to sort
 constexpr std::size_t size = 1'000'000;
 
 // Maximum time to let the benchmark run for a given size before giving up
-auto max_run_time = 3s;
+auto max_run_time = 5s;
 // Maximum number of benchmark runs per size
 std::size_t max_runs_per_size = 25;
 
@@ -68,18 +67,24 @@ int main(int argc, char* argv[])
     std::uint_fast32_t seed = std::time(nullptr);
     std::cout << "SEED: " << seed << '\n';
 
+    int sort_number = 0;
     for (auto& sort: sorts) {
         // Create a file to store the results
-        std::string output_filename = output_directory + '/' + safe_file_name(sort.first) + ".csv";
-        std::ofstream output_file(output_filename);
+        auto sort_number_str = std::to_string(sort_number);
+        auto output_filename =
+            std::string(3 - sort_number_str.size(), '0') +
+            std::move(sort_number_str) +
+            '-' + safe_file_name(sort.first) + ".csv";
+        std::string output_path = output_directory + '/' + output_filename;
+        std::ofstream output_file(output_path);
         output_file << sort.first << '\n';
         std::cout << sort.first << '\n';
 
         // Seed the distribution manually to ensure that all algorithms
         // sort the same collections when there is randomness
         distributions_prng.seed(seed);
 
-        for (int idx = 0 ; idx <= 100 ; ++idx) {
+        for (int idx = 0; idx <= 100; ++idx) {
             double factor = 0.01 * idx;
             auto distribution = dist::inversions(factor);
 
@@ -100,9 +105,18 @@ int main(int argc, char* argv[])
             }
 
             // Compute and display stats & numbers
-            double avg = average(cycles);
-            output_file << idx << ", " << avg << '\n';
-            std::cout << idx << ", " << avg << std::endl;
+            output_file << idx << ",";
+            std::cout << idx << ",";
+            auto it = cycles.begin();
+            output_file << *it;
+            std::cout << *it;
+            while (++it != cycles.end()) {
+                output_file << "," << *it;
+                std::cout << "," << *it;
+            }
+            output_file << '\n';
+            std::cout << std::endl;
         }
+        ++sort_number;
     }
 }
diff --git a/benchmarks/inversions/plot.py b/benchmarks/inversions/plot.py
@@ -1,10 +1,11 @@
 # -*- coding: utf-8 -*-
 
-# Copyright (c) 2020-2021 Morwenn
+# Copyright (c) 2020-2022 Morwenn
 # SPDX-License-Identifier: MIT
 
 import argparse
 import pathlib
+import sys
 
 import numpy
 from matplotlib import pyplot
@@ -15,7 +16,7 @@ def fetch_results(fresults):
     results.pop()
     return [float(elem) for elem in results]
 
-    
+
 if __name__ == '__main__':
     parser = argparse.ArgumentParser(description="Plot the results of the errorbar-plot benchmark.")
     parser.add_argument('root', help="directory with the result files to plot")
@@ -26,6 +27,7 @@ def fetch_results(fresults):
 
     root = pathlib.Path(args.root)
     result_files = list(root.glob('*.csv'))
+    result_files.sort()
     if len(result_files) == 0:
         print(f"There are no files to plot in {root}")
         sys.exit(1)
@@ -42,22 +44,27 @@ def fetch_results(fresults):
     colors = iter(palette)
 
     for result_file in result_files:
+        percent_inversions = []
+        averages = []
         with result_file.open() as fd:
             # Read the first line
             algo_name = fd.readline().strip()
             # Read the rest of the file
-            data = numpy.genfromtxt(fd, delimiter=',').transpose()
-            percent_inversions, avg = data
+            for line in fd:
+                pct, *data = line.strip().split(',')
+                data = list(map(int, data))
+                percent_inversions.append(pct)
+                averages.append(numpy.average(data))
 
         # Plot the results
-        pyplot.plot(percent_inversions,
-                    avg,
+        pyplot.plot(list(map(int, percent_inversions)),
+                    averages,
                     label=algo_name,
                     color=next(colors))
 
     # Add a legend
-    pyplot.legend(loc='best')
-    pyplot.title('Sorting std::vector<int> with $10^6$ elements')
-    pyplot.xlabel('Percentage of inversions')
-    pyplot.ylabel('Cycles (lower is better)')
+    pyplot.legend()
+    pyplot.title("Sorting std::vector<int> with $10^6$ elements")
+    pyplot.xlabel("Percentage of inversions")
+    pyplot.ylabel("Cycles (lower is better)")
     pyplot.show()
diff --git a/benchmarks/small-array/benchmark.cpp b/benchmarks/small-array/benchmark.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015-2021 Morwenn
+ * Copyright (c) 2015-2022 Morwenn
  * SPDX-License-Identifier: MIT
  */
 #include <algorithm>
@@ -46,8 +46,10 @@ template<
     typename DistributionFunction
 >
 auto time_it(Sorter sorter, DistributionFunction distribution)
-    -> double
+    -> std::uint64_t
 {
+    static_assert(N > 0, "this benchmark does not support zero-sized arrays");
+
     // Seed the distribution manually to ensure that all algorithms
     // sort the same collections when there is randomness
     distributions_prng.seed(seed);
@@ -65,53 +67,53 @@ auto time_it(Sorter sorter, DistributionFunction distribution)
         sorter(arr);
         std::uint64_t end = rdtsc();
         assert(std::is_sorted(arr.begin(), arr.end()));
-        cycles.push_back(end - start);
+        cycles.push_back(double(end - start) / N);
         total_end = clock_type::now();
     }
 
-    // Return the average number of cycles it took to sort the arrays
-    std::uint64_t avg = 0;
-    for (auto value: cycles) {
-        avg += value;
-    }
-    return avg / double(cycles.size());
+    // Return the median number of cycles per element
+    auto cycles_median = cycles.begin() + cycles.size() / 2;
+    std::nth_element(cycles.begin(), cycles_median, cycles.end());
+    return *cycles_median;
 }
 
 template<
     typename T,
-    typename Distribution,
+    typename Dist,
     std::size_t... Ind
 >
 auto time_distribution(std::index_sequence<Ind...>)
     -> void
 {
-    using sorting_network_sorter = cppsort::small_array_adapter<
-        cppsort::sorting_network_sorter
-    >;
-
     using low_comparisons_sorter = cppsort::small_array_adapter<
         cppsort::low_comparisons_sorter
     >;
-
     using low_moves_sorter = cppsort::small_array_adapter<
         cppsort::low_moves_sorter
     >;
+    using merge_exchange_network_sorter = cppsort::small_array_adapter<
+        cppsort::merge_exchange_network_sorter
+    >;
+    using sorting_network_sorter = cppsort::small_array_adapter<
+        cppsort::sorting_network_sorter
+    >;
 
     // Compute results for the different sorting algorithms
-    std::pair<const char*, std::array<double, sizeof...(Ind)>> results[] = {
-        { "insertion_sorter",       { time_it<T, Ind>(cppsort::insertion_sort,  Distribution{})... } },
-        { "selection_sorter",       { time_it<T, Ind>(cppsort::selection_sort,  Distribution{})... } },
-        { "low_moves_sorter",       { time_it<T, Ind>(low_moves_sorter{},       Distribution{})... } },
-        { "low_comparisons_sorter", { time_it<T, Ind>(low_comparisons_sorter{}, Distribution{})... } },
-        { "sorting_network_sorter", { time_it<T, Ind>(sorting_network_sorter{}, Distribution{})... } },
+    std::pair<const char*, std::array<std::uint64_t, sizeof...(Ind)>> results[] = {
+        { "insertion_sorter",               { time_it<T, Ind + 1>(cppsort::insertion_sort,          Dist{})... } },
+        { "selection_sorter",               { time_it<T, Ind + 1>(cppsort::selection_sort,          Dist{})... } },
+        { "low_comparisons_sorter",         { time_it<T, Ind + 1>(low_comparisons_sorter{},         Dist{})... } },
+        { "low_moves_sorter",               { time_it<T, Ind + 1>(low_moves_sorter{},               Dist{})... } },
+        { "merge_exchange_network_sorter",  { time_it<T, Ind + 1>(merge_exchange_network_sorter{},  Dist{})... } },
+        { "sorting_network_sorter",         { time_it<T, Ind + 1>(sorting_network_sorter{},         Dist{})... } },
     };
 
     // Output the results to their respective files
     std::ofstream output(Distribution::output);
     for (auto&& sort_result: results) {
-        output << std::get<0>(sort_result) << ' ';
+        output << std::get<0>(sort_result) << ',';
         for (auto&& nb_cycles: std::get<1>(sort_result)) {
-            output << nb_cycles << ' ';
+            output << nb_cycles << ',';
         }
         output << '\n';
     }
@@ -125,7 +127,7 @@ template<
 auto time_distributions()
     -> void
 {
-    using indices = std::make_index_sequence<N>;
+    using indices = std::make_index_sequence<N - 1>;
 
     // Variadic dispatch only works with expressions
     int dummy[] = {