WIP: automatic squeezing of dot products using a linear transform

stillwater-sc · Apr 28, 2023 · ce455dd · ce455dd
1 parent e24c1ad
commit ce455dd
Showing 1 changed file with 91 additions and 2 deletions.
diff --git a/benchmark/error/blas/dot.cpp b/benchmark/error/blas/dot.cpp
@@ -134,12 +134,101 @@ void SampleError(unsigned N = 10000, double mean = 0.0, double stddev = 2.0) {
 	DotProductError< integer<8> >(x, minx, maxx, y, miny, maxy);
 }
 
+/*
+ * When we want to take arbitrary vectors and want to faithfully calculate a 
+ * dot product using lower precision types, we need to 'squeeze' the values
+ * of the original vector such that the computational dynamics of the dot product
+ * can be emulated. 
+ * 
+ * When you think about very constrained types like 8-bit floating-point formats
+ * the risk of overflow and underflow of the products is the first problem
+ * to solve. Secondly, for long vectors overflow and catastrophic cancellation
+ * are also risks.
+ * 
+ * Assume we have a vector x like this
+ * 
+ *                  *
+ *                 ***
+ *             ***********
+ *       *********************** 
+ * -----------------+--------------------
+ * |     ^          0^         ^        |
+ * |  minneg        min       max       |
+ * minneg                             maxpos
+ *          |-------------|
+ *       minneg        maxpos of target number system
+ * 
+ * we need to 'squeeze' 
+ *    max to sqrt(maxpos) of target system
+ *    min to sqrt(minpos) of target system
+ * which ever is more constraining;
+ * 
+ * maxScale = sqrt(maxpos) / max
+ * minScale = sqrt(minpos) / min
+ *                  
+ */
+
+template<typename Real>
+std::pair<Real, Real> minmax(const sw::universal::blas::vector<Real>& v) {
+	auto minValue = abs(v[sw::universal::blas::amin(v.size(), v)]);
+	auto maxValue = abs(v[sw::universal::blas::amax(v.size(), v)]);
+	std::cout << "minValue  : " << minValue << '\n';
+	std::cout << "maxValue  : " << maxValue << '\n';
+	return std::pair(minValue, maxValue);
+}
+
+template<typename Target>
+sw::universal::blas::vector<Target> squeeze(const sw::universal::blas::vector<double>& v) {
+	auto minpos = double(std::numeric_limits<Target>::min());
+	auto maxpos = double(std::numeric_limits<Target>::max());
+
+	auto vminmax = minmax(v);
+	auto minValue = vminmax.first;
+	auto maxValue = vminmax.second;
+
+	auto sqrtMinpos = sqrt(minpos);
+	auto sqrtMaxpos = sqrt(maxpos);
+
+	auto minScale = sqrtMinpos / minValue;
+	auto maxScale = sqrtMaxpos / maxValue;
+
+	std::cout << "minScale  : " << minScale << '\n';
+	std::cout << "maxScale  : " << maxScale << '\n';
+
+	sw::universal::blas::vector<Target> t(v.size());
+	if (abs(maxValue) < sqrtMaxpos) maxScale = 1.0; // no need to scale
+	t = maxScale * v;
+
+	return t;
+}
+
 int main()
 try {
 	using namespace sw::universal;
 
-	unsigned N{ 10000 };
-	TestSampleError(N, 0.0, 1.0);
+	unsigned N{ 1000 };
+	double mean{ 0.0 }, stddev{ 1.0 };
+
+	auto dv = sw::universal::blas::gaussian_random_vector<double>(N, mean, stddev);
+	auto dminmax = minmax(dv);
+
+	auto sv = squeeze<float>(dv);
+	auto sminmax = minmax(sv);
+
+	auto hv = squeeze<half>(dv);
+	auto hminmax = minmax(hv);
+
+	auto qv = squeeze<quarter>(dv);
+	auto qminmax = minmax(qv);
+
+	if (N < 15) {
+		std::cout << dv << '\n';
+		std::cout << sv << '\n';
+		std::cout << hv << '\n';
+		std::cout << qv << '\n';
+	}
+
+	//TestSampleError(N, 0.0, 1.0);
 	return 0;
 	SampleError(N, 0.0, 1.0);
 	SampleError(N, 0.0, 2.0);