bug fix in amin/amax BLAS and first pass scaling

stillwater-sc · Apr 26, 2023 · e5e8449 · e5e8449
1 parent c5800d7
commit e5e8449
Show file tree

Hide file tree

Showing 3 changed files with 66 additions and 19 deletions.
diff --git a/benchmark/error/blas/dot.cpp b/benchmark/error/blas/dot.cpp
@@ -43,24 +43,66 @@ void DotProductError(const sw::universal::blas::vector<double>& x, double minx,
 	using namespace sw::universal; 
 	std::cout << "\nScalar type : " << type_tag(Scalar()) << '\n';
 
-	auto min = std::numeric_limits<Scalar>::min();
-	auto max = std::numeric_limits<Scalar>::max();
-	auto scale = (max * 0.5) / maxx;
+	auto minpos = double(std::numeric_limits<Scalar>::min());
+	auto maxpos = double(std::numeric_limits<Scalar>::max());
+	auto maxxy = std::max(maxx, maxy);
+	auto focus{ 1.0 }, expand{ 1.0 };
+	if (maxxy*maxxy > maxpos) { // need to scale the vectors
+		double upperbound = sqrt(maxpos);
+		focus = upperbound / maxxy;
+		expand = maxxy / upperbound;  // is this a more precise calculation than (1.0 / scale)? 
+		                              // Yes, but how much more precise? The trouble spots are at the extremes of the range
+
+		// check for underflow
+		auto smallestScaledx = focus * minx;
+		auto smallestScaledy = focus * miny;
+		auto smallestScaledElement = std::min(smallestScaledx, smallestScaledy);
+		if (focus * minx < minpos || focus * miny < minpos) {
+			std::cout << "Scaling is causing underflow: " << smallestScaledElement << " < " << minpos << '\n';
+		}
+	}
 	auto nrSamples = size(x);
 	blas::vector<Scalar> xx(nrSamples);
-	xx = x;
+	xx = focus * x;
 	blas::vector<Scalar> yy(nrSamples);
-	yy = y;
+	yy = focus * y;
 
 	double real = x * y;
-	double sample = double(xx * yy);
+	double sample = double(xx * yy) * expand;
 	TraceProducts(xx, yy);
 	double dotError = log(real / sample);
 	constexpr unsigned COLWIDTH = 15;
-	if constexpr (verbose) std::cout << std::setw(10) << real << std::setw(COLWIDTH) << sample << std::setw(COLWIDTH) << (real / sample) << std::setw(COLWIDTH) << dotError << '\n';
+	if constexpr (verbose) {
+		std::cout << std::setw(10) << "Reference" << std::setw(COLWIDTH) << "Target Type" << std::setw(COLWIDTH) << "Ratio" << std::setw(COLWIDTH) << "ln(ratio)" << '\n';
+		std::cout << std::setw(10) << real << std::setw(COLWIDTH) << sample << std::setw(COLWIDTH) << (real / sample) << std::setw(COLWIDTH) << dotError << '\n';
+	}
 	else std::cout << "DOT product sampling error : " << dotError << '\n';
 }
 
+void TestSampleError(unsigned N = 10000, double mean = 0.0, double stddev = 2.0) {
+	using namespace sw::universal;
+
+	auto x = sw::universal::blas::gaussian_random_vector<double>(N, mean, stddev);
+	auto y = sw::universal::blas::gaussian_random_vector<double>(N, mean, stddev);
+
+	double minx = x[blas::amin(N, x, 1)];
+	double maxx = x[blas::amax(N, x, 1)];
+	double miny = y[blas::amin(N, y, 1)];
+	double maxy = y[blas::amax(N, y, 1)];
+	constexpr bool Verbose = true;
+	DotProductError< duble >(x, minx, maxx, y, miny, maxy);
+	DotProductError< single >(x, minx, maxx, y, miny, maxy);
+	DotProductError< half >(x, minx, maxx, y, miny, maxy);
+	DotProductError< cfloat<8, 2, uint8_t, true, false> >(x, minx, maxx, y, miny, maxy);
+	DotProductError< cfloat<8, 3, uint8_t, true, false> >(x, minx, maxx, y, miny, maxy);
+	DotProductError< cfloat<8, 4, uint8_t, true, false> >(x, minx, maxx, y, miny, maxy);
+	DotProductError< cfloat<8, 5, uint8_t, true, false> >(x, minx, maxx, y, miny, maxy);
+	DotProductError< cfloat<8, 2, uint8_t, true, true> >(x, minx, maxx, y, miny, maxy);
+	DotProductError< cfloat<8, 3, uint8_t, true, true> >(x, minx, maxx, y, miny, maxy);
+	DotProductError< cfloat<8, 4, uint8_t, true, true> >(x, minx, maxx, y, miny, maxy);
+	DotProductError< cfloat<8, 5, uint8_t, true, true> >(x, minx, maxx, y, miny, maxy);
+}
+
 void SampleError(unsigned N = 10000, double mean = 0.0, double stddev = 2.0) {
 	using namespace sw::universal;
 
@@ -97,6 +139,8 @@ try {
 	using namespace sw::universal;
 
 	unsigned N{ 10000 };
+	TestSampleError(N, 0.0, 1.0);
+	return 0;
 	SampleError(N, 0.0, 1.0);
 	SampleError(N, 0.0, 2.0);
 	SampleError(N, 0.0, 5.0);

diff --git a/include/universal/blas/blas_l1.hpp b/include/universal/blas/blas_l1.hpp
@@ -124,27 +124,29 @@ void swap(size_t n, Vector& x, size_t incx, Vector& y, size_t incy) {
 
 // find the index of the element with maximum absolute value
 template<typename Vector>
-size_t amax(size_t n, const Vector& x, size_t incx) {
-	typename Vector::value_type running_max = -INFINITY;
-	size_t ix, index;
-	for (ix = 0; ix < size(x); ix += incx) {
-		if (x[ix] > running_max) {
+size_t amax(size_t n, const Vector& x, size_t incx = 1) {
+	size_t ix{ 0 }, index{ 0 };
+	auto running_max = abs(x[ix]);
+	for (ix = 1; ix < size(x); ix += incx) {
+		auto absolute = abs(x[ix]);
+		if (absolute > running_max) {
 			index = ix;
-			running_max = x[ix];
+			running_max = absolute;
 		}
 	}
 	return index;
 }
 
 // find the index of the element with minimum absolute value
 template<typename Vector>
-size_t amin(size_t n, const Vector& x, size_t incx) {
-	typename Vector::value_type running_min = INFINITY;
-	size_t ix, index;
-	for (ix = 0; ix < size(x); ix += incx) {
-		if (x[ix] < running_min) {
+size_t amin(size_t n, const Vector& x, size_t incx = 1) {
+	size_t ix{ 0 }, index{ 0 };
+	auto running_min = abs(x[ix]);
+	for (ix = 1; ix < size(x); ix += incx) {
+		auto absolute = abs(x[ix]);
+		if (absolute < running_min) {
 			index = ix;
-			running_min = x[ix];
+			running_min = absolute;
 		}
 	}
 	return index;

diff --git a/include/universal/number/integer/integer_impl.hpp b/include/universal/number/integer/integer_impl.hpp
@@ -75,6 +75,7 @@ bool parse(const std::string& number, integer<nbits, BlockType, NumberType>& v);
 // idiv_t for integer<nbits, BlockType, NumberType> to capture quotient and remainder during long division
 template<unsigned nbits, typename BlockType, IntegerNumberType NumberType>
 struct idiv_t {
+	idiv_t() : quot{ 0 }, rem{ 0 } {};
 	integer<nbits, BlockType, NumberType> quot; // quotient
 	integer<nbits, BlockType, NumberType> rem;  // remainder
 };