WIP: building an error benchmarking structure for sampling, scaling, …

…and linear algebra
stillwater-sc · May 1, 2023 · 24b6b43 · 24b6b43
1 parent 516c474
commit 24b6b43
Show file tree

Hide file tree

Showing 6 changed files with 204 additions and 88 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -818,6 +818,7 @@ endif(BUILD_MIXEDPRECISION_TENSOR)
 # error benchmarks
 if(BUILD_BENCHMARK_ERROR)
 add_subdirectory("benchmark/error/sampling")
+add_subdirectory("benchmark/error/scaling")
 add_subdirectory("benchmark/error/blas")
 endif(BUILD_BENCHMARK_ERROR)
 

diff --git a/benchmark/error/blas/dot.cpp b/benchmark/error/blas/dot.cpp
@@ -138,23 +138,44 @@ namespace sw {
 	namespace universal {
 		// data normalization
 
-		// MinMaxScaler rescales the elements of a vector from their original 
+		// minmaxscaler rescales the elements of a vector from their original 
 		// range [min, max] to a new range [lb, ub]
 		template<typename Scalar>
-		blas::vector<Scalar> minmaxScaler(const blas::vector<Scalar>& v, Scalar lb = 0, Scalar ub = 1) {
-			if (lb < ub) {
+		blas::vector<Scalar> minmaxscaler(const blas::vector<Scalar>& v, Scalar lb = 0, Scalar ub = 1) {
+			blas::vector<Scalar> t; 
+			if (lb >= ub) {
 				std::cerr << "target range is inconsistent\n";
+				return t;
 			}
-			auto min = abs(v[blas::amin(v.size(), v)]);
-			auto max = abs(v[blas::amax(v.size(), v)]);
-			auto mapto = (ub - lb) / (max - min);
-			blas::vector<Scalar> t;
+			std::pair< Scalar, Scalar> mm = blas::range(v);
+			Scalar min = mm.first;
+			Scalar max = mm.second;
+			auto scale = (ub - lb) / (max - min);
+			auto offset = lb - min * scale;
+			std::cout << min << ", " << max << ", " << lb << ", " << ub << ", " << scale << ", " << offset << '\n';
 			for (auto e : v) {
-				t.push_back( (e - min) * mapto );
+				t.push_back( e * scale + offset );
 			}
 			return t;
 		}
 
+		template<typename Target>
+		blas::vector<Target> compress(const blas::vector<double>& v) {
+			auto maxpos = double(std::numeric_limits<Target>::max());
+
+			auto vminmax = arange(v);
+			auto minValue = vminmax.first;
+			auto maxValue = vminmax.second;
+
+			sw::universal::blas::vector<Target> t(v.size());
+			auto sqrtMaxpos = sqrt(maxpos);
+			double maxScale = 1.0;
+			if (abs(maxValue) > sqrtMaxpos) maxScale = sqrtMaxpos / maxValue;
+			t = maxScale * v;
+
+			return t;
+		}
+
 	}
 }
 
@@ -168,94 +189,19 @@ namespace sw {
  * the risk of overflow and underflow of the products is the first problem
  * to solve. Secondly, for long vectors overflow and catastrophic cancellation
  * are also risks.
- * 
- * Assume we have a vector x like this
- * 
- *                  *
- *                 ***
- *             ***********
- *       *********************** 
- * -----------------+--------------------
- * |     ^          0^         ^        |
- * |  minneg        min       max       |
- * minneg                             maxpos
- *          |-------------|
- *       minneg        maxpos of target number system
- * 
- * we need to 'squeeze' 
- *    max to sqrt(maxpos) of target system
- *    min to sqrt(minpos) of target system
- * which ever is more constraining;
- * 
- * maxScale = sqrt(maxpos) / max
- * minScale = sqrt(minpos) / min
- *                  
+ *                 
  */
 
-template<typename Real>
-std::pair<Real, Real> minmax(const sw::universal::blas::vector<Real>& v) {
-	auto minValue = abs(v[sw::universal::blas::amin(v.size(), v)]);
-	auto maxValue = abs(v[sw::universal::blas::amax(v.size(), v)]);
-	std::cout << "minValue  : " << minValue << '\n';
-	std::cout << "maxValue  : " << maxValue << '\n';
-	return std::pair(minValue, maxValue);
-}
-
-template<typename Target>
-sw::universal::blas::vector<Target> squeeze(const sw::universal::blas::vector<double>& v) {
-	auto minpos = double(std::numeric_limits<Target>::min());
-	auto maxpos = double(std::numeric_limits<Target>::max());
-
-	auto vminmax = minmax(v);
-	auto minValue = vminmax.first;
-	auto maxValue = vminmax.second;
-
-	auto sqrtMinpos = sqrt(minpos);
-	auto sqrtMaxpos = sqrt(maxpos);
-
-	auto minScale = sqrtMinpos / minValue;
-	auto maxScale = sqrtMaxpos / maxValue;
-
-	std::cout << "minScale  : " << minScale << '\n';
-	std::cout << "maxScale  : " << maxScale << '\n';
-
-	sw::universal::blas::vector<Target> t(v.size());
-	if (abs(maxValue) < sqrtMaxpos) maxScale = 1.0; // no need to scale
-	t = maxScale * v;
-
-	return t;
-}
 
 int main()
 try {
 	using namespace sw::universal;
 
-	unsigned N{ 14 };
+	unsigned N{ 10000 };
 	double mean{ 0.0 }, stddev{ 1.0 };
 
-	auto dv = sw::universal::blas::gaussian_random_vector<double>(N, mean, stddev);
-	auto dminmax = minmax(dv);
-	auto dminmaxScaled = minmaxScaler(dv);
-
-	auto sv = squeeze<float>(dv);
-	auto sminmax = minmax(sv);
-
-	auto hv = squeeze<half>(dv);
-	auto hminmax = minmax(hv);
-
-	auto qv = squeeze<quarter>(dv);
-	auto qminmax = minmax(qv);
-
-	if (N < 15) {
-		std::cout << dv << '\n';
-		std::cout << dminmaxScaled << '\n';
-		std::cout << sv << '\n';
-		std::cout << hv << '\n';
-		std::cout << qv << '\n';
-	}
+	TestSampleError(N, 0.0, 1.0);
 
-	//TestSampleError(N, 0.0, 1.0);
-	return 0;
 	SampleError(N, 0.0, 1.0);
 	SampleError(N, 0.0, 2.0);
 	SampleError(N, 0.0, 5.0);

diff --git a/benchmark/error/scaling/CMakeLists.txt b/benchmark/error/scaling/CMakeLists.txt
@@ -0,0 +1,3 @@
+file (GLOB SOURCES "./*.cpp")
+
+compile_all("true" "error" "Benchmarks/Error/scaling" "${SOURCES}")
diff --git a/benchmark/error/scaling/scaling.cpp b/benchmark/error/scaling/scaling.cpp
@@ -0,0 +1,122 @@
+// scaling.cpp: error measurement of data scaling to fit small and narrow representations
+//
+// Copyright (C) 2022-2023 Stillwater Supercomputing, Inc.
+//
+// This file is part of the universal numbers project, which is released under an MIT Open Source license.
+#include <universal/utility/directives.hpp>
+
+#include <universal/number/integer/integer.hpp>
+#include <universal/number/fixpnt/fixpnt.hpp>
+#include <universal/number/cfloat/cfloat.hpp>
+#include <universal/number/posit/posit.hpp>
+#include <universal/number/lns/lns.hpp>
+#include <universal/blas/blas.hpp>
+
+namespace sw {
+	namespace universal {
+		// data normalization
+
+		// minmaxscaler rescales the elements of a vector from their original 
+		// range [min, max] to a new range [lb, ub]
+		template<typename Scalar>
+		blas::vector<Scalar> minmaxscaler(const blas::vector<Scalar>& v, Scalar lb = 0, Scalar ub = 1) {
+			blas::vector<Scalar> t; 
+			if (lb >= ub) {
+				std::cerr << "target range is inconsistent\n";
+				return t;
+			}
+			std::pair< Scalar, Scalar> mm = blas::range(v);
+			Scalar min = mm.first;
+			Scalar max = mm.second;
+			auto scale = (ub - lb) / (max - min);
+			auto offset = lb - min * scale;
+			std::cout << min << ", " << max << ", " << lb << ", " << ub << ", " << scale << ", " << offset << '\n';
+			for (auto e : v) {
+				t.push_back( e * scale + offset );
+			}
+			return t;
+		}
+
+		template<typename Target>
+		blas::vector<Target> compress(const blas::vector<double>& v) {
+			auto maxpos = double(std::numeric_limits<Target>::max());
+
+			auto vminmax = arange(v);
+			auto minValue = vminmax.first;
+			auto maxValue = vminmax.second;
+
+			sw::universal::blas::vector<Target> t(v.size());
+			auto sqrtMaxpos = sqrt(maxpos);
+			double maxScale = 1.0;
+			if (abs(maxValue) > sqrtMaxpos) maxScale = sqrtMaxpos / maxValue;
+			t = maxScale * v;
+
+			return t;
+		}
+
+	}
+}
+
+/*
+ * When we want to take arbitrary vectors and want to faithfully calculate a 
+ * dot product using lower precision types, we need to 'squeeze' the values
+ * of the original vector such that the computational dynamics of the dot product
+ * can be emulated. 
+ * 
+ * When you think about very constrained types like 8-bit floating-point formats
+ * the risk of overflow and underflow of the products is the first problem
+ * to solve. Secondly, for long vectors overflow and catastrophic cancellation
+ * are also risks.
+ *                 
+ */
+
+
+int main()
+try {
+	using namespace sw::universal;
+
+	unsigned N{ 10000 };
+	double mean{ 0.0 }, stddev{ 1.0 };
+
+	auto dv = sw::universal::blas::gaussian_random_vector<double>(N, mean, stddev);
+		auto dminmax = blas::range(dv);
+		std::cout << dminmax.first << ", " << dminmax.second << '\n';
+	auto sv = compress<float>(dv);
+		auto sminmax = blas::range(sv);
+		std::cout << sminmax.first << ", " << sminmax.second << '\n';
+	auto hv = compress<half>(dv);
+		auto hminmax = blas::range(hv);
+		std::cout << hminmax.first << ", " << hminmax.second << '\n';
+	auto qv = compress<quarter>(dv);
+		auto qminmax = blas::range(qv);
+		std::cout << qminmax.first << ", " << qminmax.second << " : " << symmetry_range<quarter>() << '\n';
+
+	if (N < 15) {
+		std::cout << dv << '\n';
+		std::cout << sv << '\n';
+		std::cout << hv << '\n';
+		std::cout << qv << '\n';
+	}
+
+	return EXIT_SUCCESS;
+}
+catch (char const* msg) {
+	std::cerr << msg << std::endl;
+	return EXIT_FAILURE;
+}
+catch (const sw::universal::universal_arithmetic_exception& err) {
+	std::cerr << "Uncaught arithmetic exception: " << err.what() << std::endl;
+	return EXIT_FAILURE;
+}
+catch (const sw::universal::universal_internal_exception& err) {
+	std::cerr << "Uncaught internal exception: " << err.what() << std::endl;
+	return EXIT_FAILURE;
+}
+catch (const std::runtime_error& err) {
+	std::cerr << "Uncaught runtime exception: " << err.what() << std::endl;
+	return EXIT_FAILURE;
+}
+catch (...) {
+	std::cerr << "Caught unknown exception" << std::endl;
+	return EXIT_FAILURE;
+}
diff --git a/include/universal/blas/blas.hpp b/include/universal/blas/blas.hpp
@@ -3,7 +3,7 @@
 // Super-simple BLAS implementation to aid the application,
 // numerical, and reproducibility examples.
 //
-// Copyright (C) 2017-2021 Stillwater Supercomputing, Inc.
+// Copyright (C) 2017-2023 Stillwater Supercomputing, Inc.
 //
 // This file is part of the universal numbers project, which is released under an MIT Open Source license.
 #ifndef _UNIVERSAL_BLAS_LIBRARY
@@ -66,10 +66,11 @@ constexpr uint64_t SIZE_512G = 512 * SIZE_1G;
 #include <universal/blas/generators.hpp>
 
 // Utilities
+#include <universal/blas/scaling.hpp>
 #include <universal/blas/linspace.hpp>
 
 // MATLAB-style elementary vector functions
 #include <universal/blas/vmath/power.hpp>
 #include <universal/blas/vmath/trigonometry.hpp>
 
-#endif // _UNIVERSAL_BLAS_LIBRARY
+#endif // _UNIVERSAL_BLAS_LIBRARY
diff --git a/include/universal/blas/scaling.hpp b/include/universal/blas/scaling.hpp
@@ -0,0 +1,43 @@
+#pragma once
+// scaling.hpp: scaling functions for data preprocessing
+//
+// Copyright (C) 2017-2023 Stillwater Supercomputing, Inc.
+//
+// This file is part of the universal numbers project, which is released under an MIT Open Source license.
+#include <cmath>
+#include <universal/math/math>  // injection of native IEEE-754 math library functions into sw::universal namespace
+
+namespace sw { namespace universal { namespace blas {
+
+// range returns the minimum and maximum value of the vector
+template<typename Vector>
+std::pair<typename Vector::value_type, typename Vector::value_type> range(const Vector& v, unsigned incx = 1) {
+	using Scalar = typename Vector::value_type;
+	if (v.size() == 0) return std::pair(Scalar(0), Scalar(0));
+	Scalar e = v[0];
+	Scalar running_min{ e }, running_max{ e };
+	for (unsigned i = 1; i < v.size(); ++i) {
+		e = v[i];
+		if (e < running_min) running_min = e;
+		if (e > running_max) running_max = e;
+	}
+	return std::pair(running_min, running_max);
+}
+
+// arange returns the absolute minimum and maximum value of the vector
+template<typename Vector>
+std::pair<typename Vector::value_type, typename Vector::value_type> arange(const Vector& v, unsigned incx = 1) {
+	using Scalar = typename Vector::value_type;
+	if (v.size() == 0) return std::pair(Scalar(0), Scalar(0));
+	Scalar e = abs(v[0]);
+	Scalar running_min{ e }, running_max{ e };
+	for (unsigned i = 1; i < v.size(); ++i) {
+		e = abs(v[i]);
+		if (e < running_min) running_min = e;
+		if (e > running_max) running_max = e;
+	}
+	return std::pair(running_min, running_max);
+}
+
+
+}}} // namespace sw::universal::blas