WIP: data preprocessing functionality and tests

stillwater-sc · May 3, 2023 · ec7067c · ec7067c
1 parent 26b3208
commit ec7067c
Show file tree

Hide file tree

Showing 4 changed files with 44 additions and 15 deletions.
diff --git a/include/universal/blas/scaling.hpp b/include/universal/blas/scaling.hpp
@@ -78,14 +78,17 @@ blas::vector<Target> compress(const blas::vector<double>& v) {
 	auto maxpos = double(std::numeric_limits<Target>::max());
 
 	auto vminmax = arange(v);
-	auto minValue = vminmax.first;
+//	auto minValue = vminmax.first;
 	auto maxValue = vminmax.second;
 
 	sw::universal::blas::vector<Target> t(v.size());
 	auto sqrtMaxpos = sqrt(maxpos);
+	//std::cout << "maxValue : " << maxValue << " sqrt(maxpos) : " << sqrtMaxpos << '\n';
 	double maxScale = 1.0;
-	if (abs(maxValue) > sqrtMaxpos) maxScale = sqrtMaxpos / maxValue;
+	if (abs(maxValue) >= sqrtMaxpos) maxScale = sqrtMaxpos / maxValue;
+	//std::cout << "scale factor      : " << maxScale << '\n';
 	t = maxScale * v;
+	//std::cout << "compressed vector : " << t << '\n';
 
 	return t;
 }

diff --git a/include/universal/number/cfloat/numeric_limits.hpp b/include/universal/number/cfloat/numeric_limits.hpp
@@ -63,7 +63,7 @@ class numeric_limits< sw::universal::cfloat<nbits, es, bt, hasSubnormals, hasSup
 	static constexpr bool has_infinity  = true;
 	static constexpr bool has_quiet_NaN = true;
 	static constexpr bool has_signaling_NaN = true;
-	static constexpr float_denorm_style has_denorm = denorm_absent;
+	static constexpr float_denorm_style has_denorm = (hasSubnormals ? denorm_present : denorm_absent);
 	static constexpr bool has_denorm_loss = false;
 
 	static constexpr bool is_iec559 = false;

diff --git a/include/universal/traits/arithmetic_traits.hpp b/include/universal/traits/arithmetic_traits.hpp
@@ -23,7 +23,13 @@ namespace sw { namespace universal {
 		std::stringstream str;
 		str << std::left << std::setw(WIDTH_TYPE_TAG) << type_tag(Ty());
 		str << " : ";
-		str << "min " << std::setw(13) << std::numeric_limits<Ty>::min() << "     ";
+		if (std::numeric_limits<Ty>::has_denorm == std::float_denorm_style::denorm_absent) {
+			str << "min " << std::setw(13) << std::numeric_limits<Ty>::min() << "     ";
+		}
+		else {
+			str << "min " << std::setw(13) << std::numeric_limits<Ty>::denorm_min() << "     ";
+		}
+
 		str << "max " << std::setw(13) << std::numeric_limits<Ty>::max() << "     ";
 		return str.str();
 	}

diff --git a/linalg/data/scaling.cpp b/linalg/data/scaling.cpp
@@ -22,6 +22,7 @@ int VerifyRange(bool reportTestCases = false) {
 	using namespace sw::universal;
 	using namespace sw::universal::blas;
 
+	std::cerr << "VerifyRange\n" << minmax_range<Scalar>() << '\n';
 	int nrFailedTests{ 0 };
 	Scalar maxneg = std::numeric_limits<Scalar>::lowest();
 	Scalar maxpos = std::numeric_limits<Scalar>::max();
@@ -32,7 +33,6 @@ int VerifyRange(bool reportTestCases = false) {
 	v.push_back(std::numeric_limits<Scalar>::min());
 	v.push_back(maxpos);
 
-
 	auto minmax = range(v);
 	if (minmax.first != maxneg && minmax.second != maxpos) {
 		++nrFailedTests;
@@ -55,6 +55,7 @@ int VerifyCompress(bool reportTestCases = false) {
 	using namespace sw::universal;
 	using namespace sw::universal::blas;
 
+	std::cerr << "VerifyCompress\n" << minmax_range<Scalar>() << '\n';
 	int nrFailedTests{ 0 };
 
 	// to validate that compress() works, we are going to create a
@@ -64,27 +65,40 @@ int VerifyCompress(bool reportTestCases = false) {
 
 	// we are going to assume that the target arithmetic can represent
 	// normal distributed data with zero mean and stddev of 1.0
-	unsigned N{ 100 };
+	unsigned N{ 20 };
 	double mean{ 0.0 };
 	double stddev{ 1.0 };
 	vector<double> v = gaussian_random_vector<double>(N, mean, stddev);
+	if (N < 20) std::cout << "original vector   : " << v << '\n';
+
+	auto maxpos = double(std::numeric_limits<Scalar>::max());
+	auto vminmax = arange(v);
+	auto maxValue = vminmax.second;
+	auto scale = sqrt(maxpos) / maxValue;
+	// scale the original to 'fill' 75% of the dynamic range of the target scale
+	if (N < 20) std::cout << "scale up          : " << scale << '\n';
+	v *= scale;
+	// assign it to the target type
 	vector<Scalar> ref(v);
-	v = ref; // convert the reference to double
-	double scale = double(sqrt(std::numeric_limits<Scalar>::max()));
-	v *= scale; // scale the data
+	v = ref; // convert the double vector to the target reference
+	if (N < 20) std::cout << "converted vector  : " << v << '\n';
+
 	vector<Scalar> compressed = compress<Scalar>(v);
-	if (compressed != ref) {
-		++nrFailedTests;
-		if (reportTestCases) std::cerr << "compressed vector is not equal to reference\n";
-		for (unsigned i = 0; i < 20; ++i) {
-			std::cerr << i << " : " << compressed[i] << " vs " << ref[i] << '\n';
+	if (N < 20) {
+		std::cout << "compressed vector : " << compressed << '\n';
+		for (auto e : compressed) std::cout << to_binary(e) << " : " << e << '\n';
+
+		for (unsigned i = 0; i < N; ++i) {
+			auto factor = double(compressed[i]) / v[i];
+			std::cout << i << " : " << factor << '\n';
 		}
 	}
+
 	return nrFailedTests;
 }
 
 // Regression testing guards: typically set by the cmake configuration, but MANUAL_TESTING is an override
-#define MANUAL_TESTING 1
+#define MANUAL_TESTING 0
 // REGRESSION_LEVEL_OVERRIDE is set by the cmake file to drive a specific regression intensity
 // It is the responsibility of the regression test to organize the tests in a quartile progression.
 //#undef REGRESSION_LEVEL_OVERRIDE
@@ -112,6 +126,12 @@ try {
 
 #if MANUAL_TESTING
 
+	std::cout << minmax_range<float>() << '\n';
+	std::cout << minmax_range<half>() << '\n';  // has subnormals
+	std::cout << minmax_range<cfloat<16, 5, uint16_t, false, false, false> >() << '\n'; // no subnormals
+	std::cout << minmax_range<quarter>() << '\n'; // has subnormals
+	std::cout << minmax_range<cfloat<8, 2, uint8_t, false, false, false> >() << '\n'; // no subnormals
+
 	// manual test cases
 	nrOfFailedTestCases += ReportTestResult(VerifyCompress<half>(reportTestCases), "compress to half precision", "half precision");
 	nrOfFailedTestCases += ReportTestResult(VerifyCompress<quarter>(reportTestCases), "compress to quarter precision", "quarter precision");